In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd  #data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sn
import matplotlib.pyplot as plt

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df=pd.read_csv("/kaggle/input/predict-test-scores-of-students/test_scores.csv")

In [None]:
df.head()


# **Exploratory analysis**

In [None]:
df.describe()

In [None]:
sn.set_style(style="whitegrid")

In [None]:
sn.barplot(x="school_type",
           y="posttest",data=df)
plt.show()

In [None]:
sn.barplot(x="teaching_method",
           y="posttest",data=df)
plt.show()

In [None]:
sn.barplot(x="teaching_method",
           y="posttest",hue="school_type",data=df)
plt.show()

In [None]:
sn.boxplot(x="teaching_method",
           y="posttest",data=df)
          
           #
plt.show()

In [None]:
experimental=df[(df.teaching_method=="Experimental")]
standar=df[(df.teaching_method=="Standard")]

In [None]:
plt.subplots(1,1,figsize=(10,8))
sn.distplot(experimental.posttest,kde=True)
plt.show()

In [None]:
plt.subplots(1,1,figsize=(10,8))
sn.distplot(standar.posttest,kde=True)
plt.show()

In [None]:
plt.subplots(1,1,figsize=(10,8))
sn.heatmap(df.corr(),
           annot=True,
           cmap="cool")
plt.show()

# **Feature engineering**

In [None]:
def lower_upper(dataframe):
    
    lower=dataframe.posttest.quantile(0.1)
    
    upper=dataframe.posttest.quantile(0.9999)
    
    return int(lower),int(upper)

In [None]:
standar_lower_upper=lower_upper(standar)
experimental_lower_upper=lower_upper(experimental)

standar_lower_upper

In [None]:
experimental_not_out=experimental[(experimental.posttest > experimental_lower_upper[0]) &
                                  (experimental.posttest < experimental_lower_upper[1])]
                                   

In [None]:
standar_not_out=standar[(standar.posttest > standar_lower_upper[0]) &
                                  (standar.posttest < standar_lower_upper[1])]

In [None]:
df_not_out=pd.concat([experimental_not_out,standar_not_out])

In [None]:
sn.boxplot(df_not_out.teaching_method,df_not_out.posttest)
plt.show()

In [None]:
from sklearn.model_selection import train_test_split
df_not_out

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
scala=StandardScaler()

# **Sklearn Linear Regression**

In [None]:
data=df_not_out[["teaching_method","school_type","school_setting","pretest"]]
data=pd.get_dummies(data)

In [None]:
data_scala=scala.fit_transform(data)

In [None]:
target=np.array(df_not_out.posttest).reshape(-1,1)

In [None]:
data.shape,target.shape

In [None]:
X_train,X_test,Y_train,Y_test=train_test_split(data_scala,
                                               target,test_size=0.2,
                                               random_state=0)

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
reg_lineal=LinearRegression()

In [None]:
reg_lineal.fit(X_train,Y_train)

In [None]:
reg_lineal.score(X_test,Y_test)

In [None]:
y_pred=reg_lineal.predict(X_test)

In [None]:
from sklearn.metrics import mean_absolute_error

In [None]:
mean_absolute_error(Y_test,y_pred)

# **Keras**

In [None]:
import tensorflow.keras as kr

In [None]:
xtrain,xtest,ytrain,ytest=train_test_split(data_scala,scala.fit_transform(target))

In [None]:
def model():
    
    model=kr.Sequential([
        
        kr.layers.Dense(20,
                        input_dim=xtrain.shape[1],
                        activation="relu"),
        
        kr.layers.Dense(10,activation="relu"),
        
        kr.layers.Dense(10,activation="relu"),
        
        kr.layers.Dropout(0.2),
        
        kr.layers.Dense(1,activation="linear")
                        
                      
        
    ])
    
    return model

In [None]:
model=model()

In [None]:
model.compile(loss="mse",
             metrics=["mae"],
             optimizer="adam")

In [None]:
early_stop=kr.callbacks.EarlyStopping(monitor="val_mae",
                                      patience=6,
                                      restore_best_weights=True)

In [None]:
history=model.fit(xtrain,ytrain,validation_data=(xtest,ytest),
                                                  batch_size=10,
                                                   callbacks=[early_stop],
                                                   epochs=20)

In [None]:
def loss_metrics():
    
    plt.subplots(1,1,figsize=(20,8))
    
    plt.title("MAE")
    plt.plot(history.history["mae"])
    plt.plot(history.history["val_mae"])
    plt.xlabel("Epcoch")
    plt.ylabel("MAE")
    plt.legend(["Test MAE","Train MAE"])
    plt.show()
    
    plt.subplots(1,1,figsize=(20,8))
     
    plt.title("LOSS")
    plt.plot(history.history["loss"])
    plt.plot(history.history["val_loss"])
    plt.xlabel("Epcoch")
    plt.ylabel("loss")
    plt.show()


In [None]:
loss_metrics()

In [None]:
def main():
    
    plt.figure(figsize=(12,8))
    plt.title("Real vs Predict")
    plt.scatter(Y_test,y_pred,c="b")
    plt.scatter(y_pred,Y_test,c="c")
    
    plt.xlabel("Real")
    plt.ylabel("Predict")
    plt.legend(["Real","Predict"])

In [None]:
if __name__ =="__main__":
    
    main()

In [None]:
from sklearn.metrics import r2_score

In [None]:
r2_score(ytest,model.predict(xtest)),r2_score(Y_test,y_pred)

**Sklearn Win !!**