Load the dataset from a CSV file.

In [None]:
import pandas as pd

dataset = pd.read_csv('data.csv')

Preprocess the data by removing any missing values.

In [None]:
dataset.dropna(inplace=True)

Inspect the data to understand its structure and summary statistics.

In [None]:
print(dataset.describe())

Visualize the correlations between features in the dataset.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.heatmap(dataset.corr(), annot=True)
plt.show()

Analyze factors influencing happiness using a linear regression model.

In [None]:
import statsmodels.api as sm
factors = dataset[['GDP', 'Family', 'Life_Expectancy', 'Freedom', 'Corruption']]
sm.graphics.influence_plot(sm.OLS(dataset['Happiness'], sm.add_constant(factors)).fit())

Split the dataset into training and test sets for modeling.

In [None]:
from sklearn.model_selection import train_test_split
X = dataset[['GDP', 'Family', 'Life_Expectancy', 'Freedom', 'Corruption']]
y = dataset['Happiness']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Train a linear regression model using the training data.

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train, y_train)

Generate predictions using the trained model on the test set.

In [None]:
predictions = model.predict(X_test)

Evaluate the model's performance using Mean Squared Error.

In [None]:
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_test, predictions)
print(f'Mean Squared Error: {mse}')

Visualize the predictions against the true happiness values.

In [None]:
plt.scatter(y_test, predictions)
plt.plot(y_test, y_test, color='red')
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.title('Predictions vs True Values')
plt.show()