In [2]:
import pandas as pd

# Load the dataset
data = pd.read_csv('Companies_ranked_by_Dividend_Yield.csv')
data.head()

FileNotFoundError: [Errno 2] No such file or directory: 'Companies_ranked_by_Dividend_Yield.csv'

In [None]:
# Check for missing values
missing_values = data.isnull().sum()
print(missing_values)

# Fill missing values or drop them
data.dropna(inplace=True)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Univariate analysis
plt.figure(figsize=(10, 6))
sns.histplot(data['Dividend_Yield'], bins=30, kde=True)
plt.title('Distribution of Dividend Yield')
plt.xlabel('Dividend Yield')
plt.ylabel('Frequency')
plt.show()

In [None]:
# Bivariate analysis
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Earnings', y='Dividend_Yield', data=data)
plt.title('Dividend Yield vs Earnings')
plt.xlabel('Earnings')
plt.ylabel('Dividend Yield')
plt.show()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Prepare data for regression
X = data[['Earnings', 'P_E_Ratio', 'Market_Cap']]
y = data['Dividend_Yield']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the model
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

# Predictions
y_pred = linear_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

In [None]:
from sklearn.preprocessing import PolynomialFeatures

# Transform features to polynomial features
poly = PolynomialFeatures(degree=2)
X_poly = poly.fit_transform(X)

# Split the data again
X_train_poly, X_test_poly, y_train, y_test = train_test_split(X_poly, y, test_size=0.2, random_state=42)

# Fit the polynomial regression model
poly_model = LinearRegression()
poly_model.fit(X_train_poly, y_train)

# Predictions
y_poly_pred = poly_model.predict(X_test_poly)

# Evaluate the polynomial model
mse_poly = mean_squared_error(y_test, y_poly_pred)
r2_poly = r2_score(y_test, y_poly_pred)

print(f'Polynomial Mean Squared Error: {mse_poly}')
print(f'Polynomial R-squared: {r2_poly}')

In [None]:
# Visualize the performance
model_comparison = pd.DataFrame({
    'Model': ['Linear Regression', 'Polynomial Regression'],
    'MSE': [mse, mse_poly],
    'R-squared': [r2, r2_poly]
})

model_comparison.plot(x='Model', kind='bar', figsize=(10, 6), title='Model Performance Comparison')
plt.ylabel('Score')
plt.show()