In [11]:
import numpy as np
import pandas as pd

from sklearn.linear_model import LinearRegression
from sklearn.impute import SimpleImputer
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

import joblib
import warnings

warnings.filterwarnings("ignore")


In [12]:
train_df = pd.read_csv(
    "data/train.csv", index_col="id"
)
print(train_df.shape)


(26570, 25)


In [13]:
train_df["product_code"] = train_df["product_code"].map(
    {
        "A": 0,
        "B": 1,
        "C": 2,
        "D": 3,
        "E": 4,
        "F": 5,
        "G": 6,
        "H": 7,
        "I": 8,
    }
)
train_df["attribute_0"] = train_df["attribute_0"].map(
    {
        "material_5": 0, 
        "material_8": 1
    }
)
train_df["attribute_1"] = train_df["attribute_1"].map(
    {
        "material_5": 0, 
        "material_6": 1, 
        "material_7": 2, 
        "material_8": 3
    }
)

imputer = SimpleImputer(strategy="most_frequent")
train_df = pd.DataFrame(
    imputer.fit_transform(train_df),
    columns=train_df.columns,
    index=train_df.index,
)


In [14]:
X = train_df.drop("failure", axis=1)
target = train_df["failure"]


In [15]:
pca = PCA(n_components=2)
X = pca.fit_transform(X)


In [16]:
model = LinearRegression(fit_intercept=True)
model.fit(X, target)
joblib.dump(model, "LR_model")


['LR_model']