In [10]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.exceptions import NotFittedError
from sklearn.model_selection import RepeatedKFold
from sklearn.metrics import mean_squared_error

In [55]:
class model:
    def __init__(self):
        self.forest_cpu = RandomForestRegressor(n_estimators=21, random_state=1)
        self.forest_ram = RandomForestRegressor(n_estimators=21, random_state=1)
    
    def train(self, df):
        Y_cpu = df['cpu'].values.astype(float)
        Y_ram = df['ram'].values.astype(float)
        
        X = df.iloc[:, [1,2,3]].values.astype(float)
        
        X_cpu = self.compose_cpu(X)
        X_ram = self.compose_ram(X)
        
        self.forest_cpu.fit(X_cpu, Y_cpu)
        self.forest_ram.fit(X_ram, Y_ram)
        
        # scoring
        rkf = RepeatedKFold(n_splits = 3, n_repeats = 5, random_state=1)
        # cpu score
        cpu_rmse = []
        for train_index, test_index in rkf.split(X_cpu):
            X_train, X_test = X_cpu[train_index], X_cpu[test_index]
            Y_train, Y_test = Y_cpu[train_index], Y_cpu[test_index]
            
            
            forest_cpu = RandomForestRegressor(n_estimators=21, random_state=1)
            forest_cpu.fit(X_train, Y_train)
            
            R = mean_squared_error(Y_test, forest_cpu.predict(X_test))
            cpu_rmse.append(np.sqrt(R))
        
        # ram score
        ram_rmse = []
        for train_index, test_index in rkf.split(X_ram):
            X_train, X_test = X_ram[train_index], X_ram[test_index]
            Y_train, Y_test = Y_ram[train_index], Y_ram[test_index]
            
            forest_ram = RandomForestRegressor(n_estimators=21, random_state=1)
            forest_ram.fit(X_train, Y_train)
            
            R = mean_squared_error(Y_test, forest_ram.predict(X_test))
            ram_rmse.append(np.sqrt(R))
            
        forest_cpu, forest_ram = None, None
        
        return np.round(np.mean(cpu_rmse), 1), np.round(np.mean(ram_rmse), 1)
    
    def check(func):
        def wrapper(*args, **kwargs):
            try:
                return func(*args, **kwargs)
            except NotFittedError as e:
                print('Model is not yet fitted!')
            except:
                print('The model was trained on another data!')
        return wrapper
        
    @check
    def predict(self, df):
        X = df.iloc[:, [1,2,3]].values.astype(float)
        
        cpu = self.forest_cpu.predict(self.compose_cpu(X))
        ram = self.forest_ram.predict(self.compose_ram(X))
        
        cpu = np.clip(cpu, 0, None)
        ram = np.clip(ram, 0, None)
        
        return cpu, ram
    
    @check
    def predict_cpu(self, df):
        X = df.iloc[:, [1,2,3]].values.astype(float)
        return np.clip(self.forest_cpu.predict(self.compose_cpu(X)), 0, None)
    
    @check
    def predict_ram(self, df):
        X = df.iloc[:, [1,2,3]].values.astype(float)
        return np.clip(self.forest_ram.predict(self.compose_ram(X)), 0, None)
        
    def compose_cpu(self, x):
        return x
        
    def compose_ram(self, x):
        x = np.hstack((x, np.log(x+1)))
        return x

In [56]:
m = model()

In [57]:
train_df = pd.read_excel('train.xlsx', index_col=0)
test_df  = pd.read_excel('test.xlsx',  index_col=0)

In [58]:
m.train(train_df)

(894.74, 25868.18)

In [7]:
cpu, ram = m.predict(pd.read_excel('test.xlsx'))

In [8]:
cpu

array([1423.47619048,    7.85714286, 1051.76190476, 1423.47619048,
       1625.66666667,    9.23809524, 3965.80952381,    9.95238095,
       1609.28571429, 1423.47619048, 1423.47619048, 1221.9047619 ,
        400.19047619, 1171.95238095, 1219.95238095, 1285.52380952,
       1763.42857143,    9.95238095,  193.14285714,    9.95238095,
       1257.76190476, 1257.76190476,  235.47619048])