In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error

# Load the dataset
data = pd.read_csv('Dataset1.csv')

# KDE plots for distribution of grades in each period
plt.figure(figsize=(12, 8))
for i in range(1, 5):
    plt.subplot(2, 2, i)
    sns.kdeplot(data[f'Period {i} Grade'], shade=True)
    plt.title(f'Distribution of Period {i} Grades')

plt.tight_layout()
plt.show()

# Box plot for comparing grades in each period
plt.figure(figsize=(10, 6))
sns.boxplot(data=data[['Period 1 Grade', 'Period 2 Grade', 'Period 3 Grade', 'Period 4 Grade']])
plt.title('Comparison of Grades in Each Period')
plt.xlabel('Period')
plt.ylabel('Grade')
plt.show()

# Histogram for age distribution
plt.figure(figsize=(8, 6))
sns.histplot(data['Age'], bins=10, kde=True)
plt.title('Age Distribution of Students')
plt.xlabel('Age')
plt.ylabel('Count')
plt.show()

# Count plot for gender distribution
plt.figure(figsize=(8, 6))
sns.countplot(data=data, x='Gender')
plt.title('Gender Distribution of Students')
plt.xlabel('Gender')
plt.ylabel('Count')
plt.show()

# Feature selection
features = ['Age', 'Family Size', "Parent's Education", "Student's Education", 'Social Factor 1', 'Social Factor 2',
            'College Related Feature 1', 'College Related Feature 2']
X = pd.get_dummies(data[features])
y = data['Period 4 Grade']  # Target variable

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Model training and evaluation
models = {
    'Linear Regression': LinearRegression(),
    'Random Forest': RandomForestRegressor(),
    'Gradient Boosted': GradientBoostingRegressor(),
    'SVR': SVR()
}

results = {}
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    mse = mean_squared_error(y_test, y_pred)
    results[name] = mse

# Plotting results
plt.figure(figsize=(10, 6))
plt.bar(results.keys(), results.values(), color='skyblue')
plt.title('Mean Squared Error of Different Models')
plt.xlabel('Model')
plt.ylabel('Mean Squared Error')
plt.xticks(rotation=45)
plt.show()

# Analysis of results
print("Analysis of Results:")
for model, mse in results.items():
    print(f"{model}: Mean Squared Error = {mse}")
