# Tabular Playground Series
>### Session1 ML 
    > * Supervisor: Eng.Ahmed Abu ELkher
    > * By: Hossam Saad

#### Get Required Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
%matplotlib inline
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import LabelEncoder

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

### Data Cleansing

In [None]:
df_train = pd.read_csv('../input/tabular-playground-series-feb-2021/train.csv')
df_test = pd.read_csv('../input/tabular-playground-series-feb-2021/test.csv')
df_sub = pd.read_csv('../input/tabular-playground-series-feb-2021/sample_submission.csv')

In [None]:
df_train.describe()

In [None]:
df_train.describe(include=object)

In [None]:
Categories_tr =df_train.columns[df_train.dtypes=='object']
Continuous_tr =df_train.columns[df_train.dtypes!='object']

Categories_ts =df_test.columns[df_test.dtypes=='object']
Continuous_ts =df_test.columns[df_test.dtypes!='object']

### Explaratory Data Anlysis

In [None]:
def my_plot(feature):
    plt.hist(df_train[feature])
    plt.title(feature)
    plt.show()

for i in Continuous_tr:
    my_plot(i)

In [None]:
plt.figure(figsize=(8,6))
plt.boxplot(df_train['target'], flierprops=dict(markerfacecolor='b', marker='o'), showfliers=True, vert=True)
plt.title('Target Box Plot', size=16)
plt.show()

In [None]:
print('Category Features: \n\n   {0} \n\n{2} \n\nContinuous Features: \n\n   {1}'.format(Categories_tr,Continuous_tr,'-'*100))

### Categorical Features Transformation

In [None]:
for col in Categories_tr:
    Encoding = OrdinalEncoder()
    df_train[col]=Encoding.fit_transform(df_train[col].values.reshape(-1,1))
    
#train_copy.head(5)

In [None]:
for col in Categories_ts:
    Encoding = OrdinalEncoder()
    df_test[col]=Encoding.fit_transform(df_test[col].values.reshape(-1,1))

#test_copy.head(5)

### Data Correlation

In [None]:
plt.figure(figsize=(20,15))
sns.heatmap(df_train.corr(), annot=True)

In [None]:
feature_corrs = df_train.corr().unstack().sort_values()
feature_corrs

In [None]:
print(feature_corrs[(abs(feature_corrs)>0.5) & (abs(feature_corrs)<1)].drop_duplicates())

In [None]:
high_corr_df = pd.DataFrame(feature_corrs[(abs(feature_corrs)>0.5) & (abs(feature_corrs)<1)].drop_duplicates())
high_corr_df.index

In [None]:
Label = df_train['target']
Label = pd.DataFrame(Label)

In [None]:
train=df_train.drop(['target'], axis=1)
train

### Spliting Data 

In [None]:
x_train, x_test, y_train, y_test = train_test_split(train, Label, test_size=0.4, random_state=30)

### Apply Linear Regression

In [None]:
lr_model = LinearRegression()
lr_model.fit(x_train, y_train)
y_predic = lr_model.predict(x_test)
y_predic

In [None]:
mae = mean_absolute_error(y_test, y_predic)
rmse = np.sqrt(mean_squared_error(y_test, y_predic))

print('MAE = ', mae.round(4))
print('RMSE = ', rmse.round(4))

In [None]:
actual_mae = mean_absolute_error(y_test, y_predic)
actual_rmse = np.sqrt(mean_squared_error(y_test, y_predic))
print('Actual MAE = ', actual_mae)
print('Actual RMSE = ', actual_rmse)

In [None]:
test_pred = lr_model.predict(df_test)
test_pred

In [None]:
outputs = pd.DataFrame()
outputs['Id']=df_test.id
outputs['target']=test_pred
outputs.set_index('Id').to_csv('Xsubmission.csv')
# outputs.to_csv('Xsubmission.csv')