In [79]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
import seaborn as sns
import os
from sklearn.metrics import mean_squared_error,  r2_score

In [63]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [5]:
df = pd.read_csv("/kaggle/input/battery-remaining-useful-life-rul/Battery_RUL.csv")

In [6]:
%matplotlib inline

In [7]:
sns.pairplot(df)

In [16]:
# generate device ids
df["split"] = df["Cycle_Index"] == 1.0
df["device_id"] = df["split"].cumsum()

In [84]:
def calculate_metrics(model_name, y_test, y_test_pred):
    r2 = r2_score(y_test, y_test_pred)
    rmse = mean_squared_error(y_test, y_test_pred, squared=False)
    
    print(f'Model: {model_name}')
    print(f'r2 score: {r2}')
    print(f'Root Mean Squared Error: {rmse}')
    print("")
    
    return r2, rmse

In [87]:
# train test split
devices = df["device_id"].unique()
np.random.shuffle(devices)

num_train_devices = 10
train_devices = devices[:num_train_devices]
test_devices = devices[num_train_devices:]

train = df[df['device_id'].isin(train_devices)]
test = df[df['device_id'].isin(test_devices)]

# drop unecessary columns
train = train.drop(["device_id", "Cycle_Index", "split"], axis=1)
test = test.drop(["device_id", "Cycle_Index", "split"], axis=1)

# split into features and labels X, y
X_train = train.iloc[:,:-1]
y_train = train.iloc[:,-1]
X_test = test.iloc[:,:-1]
y_test = test.iloc[:,-1]

random_forest_model = RandomForestRegressor(n_estimators = 100, random_state = 0)
random_forest_model.fit(X_train, y_train)
y_test_pred_random_forest = random_forest_model.predict(X_test)

linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
y_test_pred_linear = linear_model.predict(X_test)

calculate_metrics("random forest", y_test, y_test_pred_random_forest)
calculate_metrics("linear", y_test, y_test_pred_linear)