# Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

# Loading Dataset

In [None]:
df = pd.read_csv("../input/student-alcohol-consumption/student-mat.csv")
df.head()

In [None]:
df.shape

In [None]:
df.describe()

In [None]:
df.describe(include='O')

In [None]:
df.info()

# Checking null Values

In [None]:
df.isnull().sum()

# Exploratory Data Analysis

In [None]:
sns.countplot(df['school'],hue=df['sex'])

In [None]:
sns.countplot(df['sex'],hue=df['age'])

In [None]:
sns.countplot(df['sex'],hue=df['famsize'])

In [None]:
sns.countplot(df['Pstatus'],hue=df['famsize'])

In [None]:
sns.countplot(df['Medu'],hue=df['famsize'])

In [None]:
sns.countplot(df['Fedu'],hue=df['famsize'])

In [None]:
sns.countplot(df['G1'])

In [None]:
sns.countplot(df['G2'])

# Correlation

In [None]:
plt.figure(figsize=(15,10))
sns.heatmap(df.corr(),annot=True)

In [None]:
df.head()

In [None]:
# Preprocessing and Using One hot Encoding 

In [None]:
df.describe(include='O')

In [None]:
df['Mjob'] = df['Mjob'].apply(lambda x: 'm_'+ x)
df['Fjob'] = df['Fjob'].apply(lambda x: 'f_'+ x)
df['reason'] = df['reason'].apply(lambda x: 'reason_'+ x)
df['guardian'] = df['guardian'].apply(lambda x: 'gurd_'+ x)

In [None]:
df['school'] = pd.get_dummies(df['school'],drop_first=True)
df['sex'] = pd.get_dummies(df['sex'],drop_first=True)
df['address'] = pd.get_dummies(df['address'],drop_first=True)
df['Pstatus'] = pd.get_dummies(df['Pstatus'],drop_first=True)
df['famsize'] = pd.get_dummies(df['famsize'],drop_first=True)
mjob = pd.get_dummies(df['Mjob'],drop_first=True)
fjob = pd.get_dummies(df['Fjob'],drop_first=True)
reason = pd.get_dummies(df['reason'],drop_first=True)
gurdian = pd.get_dummies(df['guardian'],drop_first=True)
df['schoolsup'] = pd.get_dummies(df['schoolsup'],drop_first=True)
df['famsup'] = pd.get_dummies(df['famsup'],drop_first=True)
df['paid'] = pd.get_dummies(df['paid'],drop_first=True)
df['activities'] = pd.get_dummies(df['activities'],drop_first=True)
df['nursery'] = pd.get_dummies(df['nursery'],drop_first=True)
df['higher']= pd.get_dummies(df['higher'],drop_first=True)
df['internet'] = pd.get_dummies(df['internet'],drop_first=True)
df['romantic'] = pd.get_dummies(df['romantic'],drop_first=True)

In [None]:
df = pd.concat([df,mjob,fjob,reason,gurdian],axis=1)

In [None]:
df.drop(['Mjob','Fjob','reason','guardian'],axis=1,inplace=True)

In [None]:
df.head()

# separate X and y

In [None]:
X = df.drop('G3',axis=1)
y = df['G3']

# Scaling the Data using standard scaler

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [None]:
X = pd.DataFrame(scaler.fit_transform(X),columns=X.columns)

In [None]:
X.head()

In [None]:
# splitting the dataset
from sklearn.model_selection import train_test_split

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3)

# Trainning Model

# Using Linear Regression

In [None]:
from sklearn.linear_model import LinearRegression,Ridge
LR = LinearRegression()
R = Ridge()

In [None]:
LR.fit(X_train,y_train)

In [None]:
print(LR.score(X_test,y_test))

In [None]:
y_predLR = LR.predict(X_test)

In [None]:
from sklearn import metrics
print('MAE:', metrics.mean_absolute_error(y_test, y_predLR))
print('MSE:', metrics.mean_squared_error(y_test, y_predLR))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, y_predLR)))

In [None]:
plt.scatter(y_test,y_predLR)

In [None]:
sns.distplot(y_test-y_predLR)

# Using Ridge Regreesion

In [None]:
R.fit(X_train,y_train)

In [None]:
print(R.score(X_test,y_test))

In [None]:
y_predR = R.predict(X_test)

In [None]:
from sklearn import metrics
print('MAE:', metrics.mean_absolute_error(y_test, y_predR))
print('MSE:', metrics.mean_squared_error(y_test, y_predR))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, y_predR)))

In [None]:
plt.scatter(y_test,y_predR)

In [None]:
sns.distplot(y_test-y_predR)

# Using Random Forest Regression and ExtraTree Regression

In [None]:
from sklearn.ensemble import RandomForestRegressor,ExtraTreesRegressor

In [None]:
RFR = RandomForestRegressor()
ETR = ExtraTreesRegressor()

In [None]:
RFR.fit(X_train,y_train)

In [None]:
print(RFR.score(X_test,y_test))

In [None]:
y_predRFR = RFR.predict(X_test)

In [None]:
from sklearn import metrics
print('MAE:', metrics.mean_absolute_error(y_test, y_predRFR))
print('MSE:', metrics.mean_squared_error(y_test, y_predRFR))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, y_predRFR)))

In [None]:
plt.scatter(y_test,y_predRFR)

In [None]:
sns.distplot(y_test-y_predRFR)

In [None]:
ETR.fit(X_train,y_train)

In [None]:
print(ETR.score(X_test,y_test))

In [None]:
y_predETR = RFR.predict(X_test)

In [None]:
from sklearn import metrics
print('MAE:', metrics.mean_absolute_error(y_test, y_predETR))
print('MSE:', metrics.mean_squared_error(y_test, y_predETR))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, y_predETR)))

In [None]:
plt.scatter(y_test,y_predETR)

In [None]:
sns.distplot(y_test-y_predETR)