### Problem Statement

Predict the Efficiency of solar panel

### Data Collection

In [1]:
!python --version

Python 3.8.10


In [2]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error,r2_score

In [3]:
df = pd.read_csv('solar_data.csv')
df.head(2)

Unnamed: 0,module type,type,"Max Power (Pmax), Wp",Max Power Volt (Vmp),Max Power Curr (Imp),Open Circuit Volt (Voc),Short Circuit Curr (Isc),Module Efficiency (%)
0,JKM590N-78HL4-BDV,STC,590,44.91,13.14,54.76,13.71,21.11
1,JKM590N-78HL4-BDV,NOCT,444,41.89,10.59,52.02,11.07,21.11


### EDA

In [4]:
# EDA

df.info()            # convert int to float

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 8 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   module type               10 non-null     object 
 1   type                      10 non-null     object 
 2   Max Power (Pmax), Wp      10 non-null     int64  
 3   Max Power Volt (Vmp)      10 non-null     float64
 4   Max Power Curr (Imp)      10 non-null     float64
 5   Open Circuit Volt (Voc)   10 non-null     float64
 6   Short Circuit Curr (Isc)  10 non-null     float64
 7   Module Efficiency (%)     10 non-null     float64
dtypes: float64(5), int64(1), object(2)
memory usage: 768.0+ bytes


In [5]:
df['Max Power (Pmax), Wp'] = df['Max Power (Pmax), Wp'].astype(float)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 8 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   module type               10 non-null     object 
 1   type                      10 non-null     object 
 2   Max Power (Pmax), Wp      10 non-null     float64
 3   Max Power Volt (Vmp)      10 non-null     float64
 4   Max Power Curr (Imp)      10 non-null     float64
 5   Open Circuit Volt (Voc)   10 non-null     float64
 6   Short Circuit Curr (Isc)  10 non-null     float64
 7   Module Efficiency (%)     10 non-null     float64
dtypes: float64(6), object(2)
memory usage: 768.0+ bytes


In [7]:
df.isna().sum()

module type                 0
type                        0
Max Power (Pmax), Wp        0
Max Power Volt (Vmp)        0
Max Power Curr (Imp)        0
Open Circuit Volt (Voc)     0
Short Circuit Curr (Isc)    0
Module Efficiency (%)       0
dtype: int64

### Model Training

In [8]:
# Defining independent & dependent features

x = df.drop(['module type','type','Module Efficiency (%)'],axis=1)
y = df['Module Efficiency (%)']

In [9]:
# split the data

x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = 0.25, random_state=2)
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((7, 5), (3, 5), (7,), (3,))

In [10]:
# Model Training

sol_model = LinearRegression()
sol_model.fit(x_train,y_train)   # BFL .... m & c

In [11]:
sol_model.coef_       # m

array([-0.03112672,  0.01033721, -2.24857195,  2.88688976,  0.89241428])

In [12]:
sol_model.intercept_   # c

-101.76705799228885

### Model Evaluation

In [13]:
# For Testing Data

y_pred_test = sol_model.predict(x_test)
y_pred_test

array([21.45193651, 21.08836024, 21.44075844])

In [14]:
mse = mean_squared_error(y_test,y_pred_test)
print(f"Mean Squared Error : {mse}")
rmse = np.sqrt(mse)
print(f"Root Mean Squared Error : {rmse}")
mae = mean_absolute_error(y_test,y_pred_test)
print(f"Mean Absolute Error : {mae}")
r2 = r2_score(y_test,y_pred_test)
print(f"R Squared Error : {r2}")

Mean Squared Error : 0.00030117886675340026
Root Mean Squared Error : 0.017354505661452885
Mean Absolute Error : 0.01631493603811407
R Squared Error : 0.9889362865274262


In [15]:
# For Training Data

y_pred_train = sol_model.predict(x_train)
y_pred_train

array([21.10727114, 21.64018378, 21.29403827, 21.29004339, 21.63870364,
       21.81976773, 21.81999205])

In [16]:
mse = mean_squared_error(y_train,y_pred_train)
print(f"Mean Squared Error : {mse}")
rmse = np.sqrt(mse)
print(f"Root Mean Squared Error : {rmse}")
mae = mean_absolute_error(y_train,y_pred_train)
print(f"Mean Absolute Error : {mae}")
r2 = r2_score(y_train,y_pred_train)
print(f"R Squared Error : {r2}")

Mean Squared Error : 3.64636321516612e-06
Root Mean Squared Error : 0.0019095452901583979
Mean Absolute Error : 0.0012186971456742413
R Squared Error : 0.9999471072239363


### User Test Function

In [17]:
x_train.head(1).T

Unnamed: 0,0
"Max Power (Pmax), Wp",590.0
Max Power Volt (Vmp),44.91
Max Power Curr (Imp),13.14
Open Circuit Volt (Voc),54.76
Short Circuit Curr (Isc),13.71


In [None]:
        Pmax = float(self.data['Pmax'])
        Vmp = float(self.data['Vmp'])
        Imp = float(self.data['Imp'])
        Voc = float(self.data['Voc'])
        Isc = float(self.data['Isc'])

        input1 = np.array([Pmax,Vmp,Imp,Voc,Isc], ndmin=2)

        prediction = self.model.predict(input1)[0]
        print(prediction)
        return prediction

In [19]:
import numpy as np

Pmax = 590.00
Vmp = 44.91
Imp = 13.14
Voc = 54.76
Isc = 13.71

input1 = np.array([Pmax,Vmp,Imp,Voc,Isc], ndmin=2)

solar_pred = sol_model.predict(input1)[0]
solar_pred



21.10727113744902

### pickle file creation

In [20]:
import pickle

with open('model.pkl','wb') as file:
    pickle.dump(sol_model,file)

In [21]:
# create json file
import json

columns_list = list(x.columns)

dict1 = {'columns':columns_list}

with open('features.json','w') as file:
    json.dump(dict1,file)