In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics

In [None]:
# Einlesen der Daten. In ein pandas Dataframe (Tabellen ähnliche Datenstruktur), https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html)
df = pd.read_csv("../avaps_codefest_data.csv", sep=';')

### Explorative Datenanalyse:
- Wie sehen unsere Daten aus?
- Wie sind sie strukturiert?
- Welche Datentypen haben wir?

In [None]:
df.columns

In [None]:
df

In [None]:
df.dtypes

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
# Note that even in the OO-style, we use `.pyplot.figure` to create the Figure.
x = df['cycle_counter']
y = df['weight']
fig, ax = plt.subplots(figsize=(15, 5), layout='constrained')
ax.scatter(x, y, label='weight')  # Plot some data on the axes.
ax.set_xlabel('cycle_counter')  # Add an x-label to the axes.
ax.set_ylabel('weight')  # Add a y-label to the axes.
ax.set_title("Cycle weights")  # Add a title to the axes.
ax.legend();  # Add a legend.


In [None]:
fig, axs = plt.subplots(5, 1, layout='constrained', figsize=(15, 10))
axs[0].scatter(df['cycle_counter'], df['umschaltvolumen'])
axs[0].set_title('umschaltvolumen')

axs[1].scatter(df['cycle_counter'], df['maximaler_spritzdruck'])
axs[1].set_title('maximaler_spritzdruck');

axs[2].scatter(df['cycle_counter'], df['umschaltspritzdruck'])
axs[2].set_title('umschaltspritzdruck');

axs[3].scatter(df['cycle_counter'], df['werkzeugheizkreis_1'])
axs[3].set_title('werkzeugheizkreis_1');

axs[4].scatter(df['cycle_counter'], df['weight'])
axs[4].set_title('weight');

### Daten bereinigen
- z.B. cyclen in denen ein Gewicht nahe null gemessen wurde werden herausgenommen aus den Daten
- das verbessert das Maschine Learning

In [None]:
# Remove cycles (rows), where weight is nan or 0.00 ()
df = df[df['weight']> 0.5]
df

In [None]:
fig, axs = plt.subplots(5, 1, layout='constrained', figsize=(15, 10))
axs[0].scatter(df['cycle_counter'], df['umschaltvolumen'])
axs[0].set_title('umschaltvolumen')

axs[1].scatter(df['cycle_counter'], df['maximaler_spritzdruck'])
axs[1].set_title('maximaler_spritzdruck');

axs[2].scatter(df['cycle_counter'], df['umschaltspritzdruck'])
axs[2].set_title('umschaltspritzdruck');

axs[3].scatter(df['cycle_counter'], df['werkzeugheizkreis_1'])
axs[3].set_title('werkzeugheizkreis_1');

axs[4].scatter(df['cycle_counter'], df['weight'])
axs[4].set_title('weight');

## Maschine Learning (Lineare Regression), um das Bauteilgewicht vorherzusagen

In [None]:
#Feature Selection ML
X = df[['umschaltvolumen','maximaler_spritzdruck','umschaltspritzdruck','werkzeugheizkreis_1']]
X

In [None]:
Y = df[['weight']]
Y

In [None]:
# Split into train and test data
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=42)

In [None]:
lm = LinearRegression()
lm.fit(X_train, y_train)

In [None]:
predictions = lm.predict(X_test)
predictions

In [None]:
lm.score(X_test, y_test, sample_weight=None)

In [None]:
print(f'MAE: {metrics.mean_absolute_error(y_test, predictions)}')
print(f'MSE: {metrics.mean_squared_error(y_test, predictions)}')
print(f'RMSE: {np.sqrt(metrics.mean_squared_error(y_test, predictions))}')
print(f'R^2: {metrics.r2_score(y_test, predictions)}')

 R^2 ~=1 richtig gute Vorhersage, R^2 ~0 oder negativ schlechte Vorhersage.

Wie machen wirs noch besser?

In [None]:
from sklearn.preprocessing import Normalizer, MinMaxScaler, RobustScaler

In [None]:
std_scaler = RobustScaler()# wandelt in Normalverteilung, um werte zwischen 0 bis 1
X_std = pd.DataFrame(std_scaler.fit_transform(X), columns=X.columns)
Y_std = pd.DataFrame(std_scaler.fit_transform(Y), columns=Y.columns)


In [None]:
X

In [None]:
X_std

In [None]:
Y_std

In [None]:
df = Y_std.reset_index()
df.plot.scatter(x = 'index', y = 'weight');

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_std, Y_std, test_size=0.33, random_state=42)

In [None]:
lm = LinearRegression()
lm.fit(X_train, y_train)
predictions = lm.predict(X_test)
lm.score(X_test, y_test, sample_weight=None)
print(f'MAE: {metrics.mean_absolute_error(y_test, predictions)}')
print(f'MSE: {metrics.mean_squared_error(y_test, predictions)}')
print(f'RMSE: {np.sqrt(metrics.mean_squared_error(y_test, predictions))}')
print(f'R^2: {metrics.r2_score(y_test, predictions)}')