### Importing the Library

In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression

In [13]:
data = pd.read_csv("../input/student-alcohol-consumption/student-mat.csv")

data

### Data Visualization

In [14]:
plt.figure(figsize=(14, 12))
sns.heatmap(data.corr(), annot=True)
plt.show()

### Preprocessing

#### Checking for missing values

In [15]:
data.isna().sum()

#### Encoding

In [16]:
data.dtypes

In [17]:
nonnumeric_columns = [data.columns[index] for index, dtype in enumerate(data.dtypes) if dtype=='O']

nonnumeric_columns

In [18]:
for column in nonnumeric_columns:
    print(f"{column}: {data[column].unique()}")


In [19]:
data["Fjob"] = data["Fjob"].apply(lambda x: "m_"+x)
data["Fjob"] = data["Fjob"].apply(lambda x: "f_"+x)
data["reason"] = data["reason"].apply(lambda x: "r_"+x)
data["guardian"] = data["guardian"].apply(lambda x: "g_"+x)

In [20]:
data

In [22]:
dummies = pd.concat([
    pd.get_dummies(data["Mjob"]),
    pd.get_dummies(data["Fjob"]),
    pd.get_dummies(data["reason"]),
    pd.get_dummies(data["guardian"])
], axis=1)

In [23]:
dummies

In [24]:
data = pd.concat([data, dummies], axis=1)

data.drop(["Mjob", "Fjob", "reason", "guardin"], axis=1, inplace=True)

In [25]:
data

In [33]:
non_numeric_columns = [data.columns[index] for index, dtype in enumerate(data.dtypes) if dtype == 'O']

for column in non_numeric_columns:
    print(f"{column}: {data[column].unique()}")

In [34]:
encoder = LabelEncoder()

for column in nonnumeric_columns:
    data[column] = encoder.fit_transform(data[column])

In [35]:
for dtype in data.dtypes:
    print(dtype)

### Scaling

In [36]:
y = data["G3"]
X = data.drop("G3", axis=1)

In [37]:
y

In [38]:
X

In [39]:
scaler = StandardScaler()

X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

In [40]:
X

### Training

In [41]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7)

In [42]:
model = LinearRegression()
model.fit(X_train, y_train)

#### Results

In [43]:
print(f"Model R2: {model.score(X_test, y_test)}")