In [1]:
import pandas as pd  
import sklearn.preprocessing as skp
from sklearn.model_selection import train_test_split  
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score

In [2]:
def preprocessing(df):
    df= df.dropna()
    nonnumericcols = df.select_dtypes(include = "object").columns
    print (nonnumericcols)
    le = skp.LabelEncoder()
    for textcol in nonnumericcols:
        le.fit(df[textcol])
        df[textcol] = le.transform(df[textcol])
    return df

In [3]:
def split(df,testratio,opcolumn):
    Y = df[opcolumn]
    X =df.drop(opcolumn,axis=1)
    X_train,X_test,Y_train,Y_test =train_test_split(X,Y,test_size=testratio,random_state = 0)
    return (X_train,X_test,Y_train,Y_test)

In [4]:
def model(X_train,X_test,Y_train,Y_test,n_estimators):
    model = RandomForestClassifier(n_estimators)
    model.fit(X_train,Y_train)
    return model

In [5]:
def validate(X_test,Y_test, model):
    y_pred = model.predict(X_test)
    print(confusion_matrix(y_pred,Y_test))
    print(classification_report(y_pred,Y_test))
    print("Accuracy",accuracy_score(Y_test,y_pred))

In [6]:
dataset = pd.read_csv("imports-85.data",header=None) #data
dataset.columns=['symboling', #columns
'normalized-losses',
'make',
'fuel-type',
'aspiration',
'num-of-doors',
'body-style',
'drive-wheels',
'engine-location',
'wheel-base',
'length',
'width',
'height',
'curb-weight',
'engine-type',
'num-of-cylinders',
'engine-size',
'fuel-system',
'bore',
'stroke',
'compression-ratio',
'horsepower',
'peak-rpm',
'city-mpg',
'highway-mpg',
'price']


In [7]:
dataset = preprocessing(dataset)
X_train,X_test,Y_train,Y_test =split(dataset,0.20,'symboling')
model = model(X_train,X_test,Y_train,Y_test,20)
validate(X_test,Y_test,model)

Index(['normalized-losses', 'make', 'fuel-type', 'aspiration', 'num-of-doors',
       'body-style', 'drive-wheels', 'engine-location', 'engine-type',
       'num-of-cylinders', 'fuel-system', 'bore', 'stroke', 'horsepower',
       'peak-rpm', 'price'],
      dtype='object')
[[ 2  0  0  0  0]
 [ 0 11  2  1  0]
 [ 0  1  7  2  1]
 [ 0  0  0  6  0]
 [ 0  0  1  1  6]]
              precision    recall  f1-score   support

          -1       1.00      1.00      1.00         2
           0       0.92      0.79      0.85        14
           1       0.70      0.64      0.67        11
           2       0.60      1.00      0.75         6
           3       0.86      0.75      0.80         8

    accuracy                           0.78        41
   macro avg       0.81      0.83      0.81        41
weighted avg       0.80      0.78      0.78        41

Accuracy 0.7804878048780488
