In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Load the CSV file into a DataFrame
file_path = '/content/Student_performance_data _.csv'  # Update the path accordingly
df = pd.read_csv(file_path)

# Display the first few rows of the DataFrame
print("First few rows of the DataFrame:")
print(df.head())

# Filter data: Select students with GPA greater than 3.0
filtered_df = df[df['GPA'] > 3.0]
print("\nFiltered DataFrame (GPA > 3.0):")
print(filtered_df.head())

# Handle missing values: Check for missing values and fill with the mean of the column
missing_values = df.isnull().sum()
print("\nMissing values in the DataFrame:")
print(missing_values)
df.fillna(df.mean(), inplace=True)

# Summary statistics: Calculate summary statistics for the DataFrame
summary_statistics = df.describe()
print("\nSummary statistics of the DataFrame:")
print(summary_statistics)

# Machine Learning: Predict GPA using Linear Regression

# Features and target variable
X = df[['Age', 'ParentalEducation', 'StudyTimeWeekly', 'Absences', 'Tutoring', 'ParentalSupport', 'Extracurricular', 'Sports', 'Music', 'Volunteering']]
y = df['GPA']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("\nMean Squared Error of the model:", mse)

# Example prediction
example_data = pd.DataFrame({
    'Age': [17],
    'ParentalEducation': [2],
    'StudyTimeWeekly': [10],
    'Absences': [5],
    'Tutoring': [1],
    'ParentalSupport': [3],
    'Extracurricular': [1],
    'Sports': [0],
    'Music': [1],
    'Volunteering': [0]
})

predicted_gpa = model.predict(example_data)
print("\nPredicted GPA for the example data:", predicted_gpa[0])


First few rows of the DataFrame:
   StudentID  Age  Gender  Ethnicity  ParentalEducation  StudyTimeWeekly  \
0       1001   17       1          0                  2        19.833723   
1       1002   18       0          0                  1        15.408756   
2       1003   15       0          2                  3         4.210570   
3       1004   17       1          0                  3        10.028829   
4       1005   17       1          0                  2         4.672495   

   Absences  Tutoring  ParentalSupport  Extracurricular  Sports  Music  \
0         7         1                2                0       0      1   
1         0         0                1                0       0      0   
2        26         0                2                0       0      0   
3        14         0                3                1       0      0   
4        17         1                3                0       0      0   

   Volunteering       GPA  GradeClass  
0             0  2.929196