In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

seed=np.random.seed(100)

In [2]:
df=pd.read_csv('solar_data.csv')
df.head()

Unnamed: 0,S1(Amp),S2(Amp),S1(Volt),S2(Volt),Light(kiloLux),Temp(degC),Weather,State
0,6.4,6.8,108,109,107.0,34,Sunny,Normal
1,7.1,6.5,110,107,108.0,38,Sunny,Normal
2,6.2,6.5,106,107,107.0,36,Sunny,Normal
3,7.4,6.1,108,109,104.0,35,Sunny,Normal
4,6.5,6.9,109,109,109.0,38,Sunny,Normal


In [3]:
df=df.sample(frac=1)
df.head()

Unnamed: 0,S1(Amp),S2(Amp),S1(Volt),S2(Volt),Light(kiloLux),Temp(degC),Weather,State
1273,1.2,5.2,75,101,102.0,37,Cloudy,Line-line
1380,0.9,5.6,72,104,102.0,34,Cloudy,Line-line
356,5.5,5.3,102,106,93.0,24,Cloudy,Normal
1920,0.5,0.4,97,90,10.0,5,Cloudy,Normal
561,0.0,5.2,0,110,91.0,21,Sunny,Open


In [4]:
df.reset_index(inplace=True)
df.drop('index', axis=1, inplace=True)
df.head()

Unnamed: 0,S1(Amp),S2(Amp),S1(Volt),S2(Volt),Light(kiloLux),Temp(degC),Weather,State
0,1.2,5.2,75,101,102.0,37,Cloudy,Line-line
1,0.9,5.6,72,104,102.0,34,Cloudy,Line-line
2,5.5,5.3,102,106,93.0,24,Cloudy,Normal
3,0.5,0.4,97,90,10.0,5,Cloudy,Normal
4,0.0,5.2,0,110,91.0,21,Sunny,Open


In [5]:
df.Weather.value_counts()

Cloudy    1504
Sunny     1496
Name: Weather, dtype: int64

In [6]:
df['Weather']=np.where(df['Weather'] == 'Sunny', 1, 0)
df.head()

Unnamed: 0,S1(Amp),S2(Amp),S1(Volt),S2(Volt),Light(kiloLux),Temp(degC),Weather,State
0,1.2,5.2,75,101,102.0,37,0,Line-line
1,0.9,5.6,72,104,102.0,34,0,Line-line
2,5.5,5.3,102,106,93.0,24,0,Normal
3,0.5,0.4,97,90,10.0,5,0,Normal
4,0.0,5.2,0,110,91.0,21,1,Open


In [7]:
df.shape

(3000, 8)

In [8]:
X=df.iloc[:, :-1]
y=df.iloc[:, -1]

In [9]:
X.head()

Unnamed: 0,S1(Amp),S2(Amp),S1(Volt),S2(Volt),Light(kiloLux),Temp(degC),Weather
0,1.2,5.2,75,101,102.0,37,0
1,0.9,5.6,72,104,102.0,34,0
2,5.5,5.3,102,106,93.0,24,0
3,0.5,0.4,97,90,10.0,5,0
4,0.0,5.2,0,110,91.0,21,1


In [10]:
y.head()

0    Line-line
1    Line-line
2       Normal
3       Normal
4         Open
Name: State, dtype: object

In [11]:
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.30, random_state=0)
X_train.shape, X_test.shape

((2100, 7), (900, 7))

In [12]:
type(X_test)

pandas.core.frame.DataFrame

In [13]:
X_test.head()

Unnamed: 0,S1(Amp),S2(Amp),S1(Volt),S2(Volt),Light(kiloLux),Temp(degC),Weather
311,5.8,5.9,105,107,97.0,28,0
1025,0.8,0.0,85,0,11.0,6,0
1587,0.6,0.0,86,0,12.0,3,0
2941,0.0,3.7,0,103,90.0,16,1
2980,3.9,4.1,87,84,87.0,15,1


In [14]:
X_test.to_csv('test_data.csv', index=False)

In [15]:
rf_model=RandomForestClassifier().fit(X_train, y_train)

In [16]:
y_pred=rf_model.predict(X_test)
y_pred[:10]

array(['Normal', 'Open', 'Open', 'Open', 'Normal', 'Line-line', 'Normal',
       'Normal', 'Open', 'Line-line'], dtype=object)

In [17]:
print(y_test[:10])

311        Normal
1025         Open
1587         Open
2941         Open
2980       Normal
2078    Line-line
2281       Normal
720        Normal
1537         Open
2493    Line-line
Name: State, dtype: object


In [18]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

   Line-line       1.00      1.00      1.00       304
      Normal       1.00      1.00      1.00       301
        Open       1.00      1.00      1.00       295

    accuracy                           1.00       900
   macro avg       1.00      1.00      1.00       900
weighted avg       1.00      1.00      1.00       900



In [19]:
rf_model.predict([[1.2, 5.2, 75, 101, 102, 37, 0]])

array(['Line-line'], dtype=object)

In [20]:
pickle.dump(rf_model, open('model.pkl', 'wb'))