# **Import Library**

In [1]:
# import all necessary packages
import pandas as pd
import numpy as np


# import plotting packages
import matplotlib.pyplot as plt
import seaborn as sns
# import ML related packages of sklearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

import plotly.express as px
import plotly.graph_objects as go


# **Data Load**

In [2]:
# load data to a new dataframe
df = pd.read_csv("data_revisi.csv")

In [3]:
# describe the dataframe with some statistical info
df.describe()

Unnamed: 0,gender,umur,average_wpm,average_accuracy,average_second
count,99.0,99.0,99.0,99.0,99.0
mean,0.484848,21.464646,51.115152,0.917677,549.080404
std,0.502314,1.996492,11.4023,0.060133,137.734809
min,0.0,19.0,28.0,0.81,274.8
25%,0.0,20.0,41.225,0.86,425.43
50%,0.0,21.0,51.95,0.92,551.33
75%,1.0,22.0,58.26,0.97,646.555
max,1.0,27.0,79.69,1.0,935.54


# **Data Visualisasi**

In [4]:
figure = px.scatter(data_frame=df, x='average_wpm', y='average_second', size='average_second', trendline='ols')
figure.show()

# **Deklarasi Variabel**

In [5]:
average_wpm = df[['average_wpm']]
average_accuracy = df[['average_accuracy']]
average_second = df[['average_second']]
umur = df[['umur']]
gender = df[['gender']]
average_wpm

Unnamed: 0,average_wpm
0,60.33
1,41.00
2,52.33
3,38.67
4,28.00
...,...
94,66.13
95,34.09
96,50.09
97,44.58


# **STANDARISASI**
### *"average_accuracy"*

In [6]:
from sklearn.preprocessing import StandardScaler

In [7]:
new_accuracy = pd.DataFrame(StandardScaler().fit_transform(average_accuracy), columns=average_accuracy.columns)
new_accuracy.head()

Unnamed: 0,average_accuracy
0,1.208842
1,0.540265
2,0.37312
3,-1.799757
4,1.375987


# **NORMALISASI** 
### _'average_wpm' dan 'average_second'_

In [8]:
from sklearn.preprocessing import MinMaxScaler

In [9]:
#Normalize average_wpm
new_wpm = pd.DataFrame(MinMaxScaler().fit_transform(average_wpm), columns=average_wpm.columns)
new_wpm.head()

Unnamed: 0,average_wpm
0,0.625459
1,0.251499
2,0.470691
3,0.206423
4,0.0


In [10]:
#Normalize average_second
new_second = pd.DataFrame(MinMaxScaler().fit_transform(average_second), columns=average_second.columns)
new_second.head()

Unnamed: 0,average_second
0,0.155583
1,0.354148
2,0.432243
3,0.069316
4,0.551806


# **Deklarasi Variabel *X* dan *Y***

In [12]:

X = pd.concat([new_accuracy, new_wpm, umur, gender], axis=1).values
y = new_second.values

In [13]:
# Bagi data menjadi data latih dan data uji
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=79)
    
# Buat model regresi linear
model = LinearRegression()
    
# Latih model pada data latih
model.fit(x_train, y_train)
    
# Lakukan prediksi pada data uji
y_pred = model.predict(x_test)
    
# Hitung metrik evaluasi
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)



In [14]:
# print y_pred
print(y_pred)

[[0.40380325]
 [0.44766825]
 [0.44749046]
 [0.43943832]
 [0.45125639]
 [0.39074292]
 [0.38428982]
 [0.45188012]
 [0.43089582]
 [0.3774111 ]
 [0.3948532 ]
 [0.45080742]
 [0.38697888]
 [0.39687659]
 [0.38924434]
 [0.46508769]
 [0.41915912]
 [0.42326406]
 [0.39411426]
 [0.39750021]]


In [15]:
# create a metrics dataframe using metric values
metrics = {
    'Model': ['First'],
    'MSE' : [mse],
    'RMSE' : [rmse],
    'MAE' : [mae],
    'R2' : [r2]
    }

metrics_df = pd.DataFrame(data=metrics)

# compare the values
metrics_df

Unnamed: 0,Model,MSE,RMSE,MAE,R2
0,First,0.029719,0.172392,0.147874,0.12904


# **Membuat Input**

In [25]:
#wpm_average = 75
#average_accuracy = 1

input_data = np.array([[75, 1, 20, 0]])
prediction = model.predict(input_data)
scalar = MinMaxScaler()
scalar.fit(average_second)
denormalized_prediction = scalar.inverse_transform(prediction)

print('Estimasi kecepatan waktu : ', denormalized_prediction/60)

Estimasi kecepatan waktu :  [[13.99948063]]


# **Save Model**

In [26]:
import pickle

filename = 'estimasi_waktu_mr.sav'
pickle.dump(model,open(filename,'wb'))

with open('average_second.pickle', 'wb') as file:
    pickle.dump(average_second, file)