In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from statsmodels.stats.outliers_influence import variance_inflation_factor
from scipy.stats import shapiro, kstest, normaltest


import seaborn as sns
import matplotlib.pyplot as plt


In [2]:
df = pd.read_csv('CSV_files\Fish.csv')
df

Unnamed: 0,Species,Weight,Length1,Length2,Length3,Height,Width
0,Bream,242.0,23.2,25.4,30.0,11.5200,4.0200
1,Bream,290.0,24.0,26.3,31.2,12.4800,4.3056
2,Bream,340.0,23.9,26.5,31.1,12.3778,4.6961
3,Bream,363.0,26.3,29.0,33.5,12.7300,4.4555
4,Bream,430.0,26.5,29.0,34.0,12.4440,5.1340
...,...,...,...,...,...,...,...
154,Smelt,12.2,11.5,12.2,13.4,2.0904,1.3936
155,Smelt,13.4,11.7,12.4,13.5,2.4300,1.2690
156,Smelt,12.2,12.1,13.0,13.8,2.2770,1.2558
157,Smelt,19.7,13.2,14.3,15.2,2.8728,2.0672


In [3]:
df.columns

Index(['Species', 'Weight', 'Length1', 'Length2', 'Length3', 'Height',
       'Width'],
      dtype='object')

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 159 entries, 0 to 158
Data columns (total 7 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Species  159 non-null    object 
 1   Weight   159 non-null    float64
 2   Length1  159 non-null    float64
 3   Length2  159 non-null    float64
 4   Length3  159 non-null    float64
 5   Height   159 non-null    float64
 6   Width    159 non-null    float64
dtypes: float64(6), object(1)
memory usage: 8.8+ KB


# Species

In [5]:
df['Species'].value_counts().to_dict()

{'Perch': 56,
 'Bream': 35,
 'Roach': 20,
 'Pike': 17,
 'Smelt': 14,
 'Parkki': 11,
 'Whitefish': 6}

In [6]:
df['Species'].replace({'Perch': 0,
                         'Bream': 1,
                         'Roach': 2,
                         'Pike': 3,
                         'Smelt': 4,
                         'Parkki': 5,
                         'Whitefish': 6}, inplace = True)

In [7]:
x = df.drop('Weight', axis = 1)
y = df['Weight']

In [8]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = 0.25, random_state = 42)
lin_reg = LinearRegression()
lin_reg.fit(x_train.values, y_train.values)

print("*"*80)
print("Training Evaluation\n")
y_pred_train = lin_reg.predict(x_train.values)
mse = mean_squared_error(y_train, y_pred_train)
print("mse:", mse)

rmse = np.sqrt(mse)
print("rmse:", rmse)

mae = mean_absolute_error(y_train , y_pred_train)
print('mae :', mae)

r_squared = r2_score(y_train , y_pred_train)
print('r_squared :', r_squared)

print("*"*80)
print("Testing Evaluation\n")
y_pred_test = lin_reg.predict(x_test.values)

mse = mean_squared_error(y_test, y_pred_test)
print("mse:", mse)

rmse = np.sqrt(mse)
print("rmse:", rmse)

mae = mean_absolute_error(y_test, y_pred_test)
print('mae :', mae)

r_squared = r2_score(y_test, y_pred_test)
print('r_squared :', r_squared)

********************************************************************************
Training Evaluation

mse: 14392.739597554897
rmse: 119.96974450900068
mae : 89.0314053596188
r_squared : 0.8859687170863504
********************************************************************************
Testing Evaluation

mse: 14743.484853333823
rmse: 121.4227526180074
mae : 95.01543086287558
r_squared : 0.8869255714602403


# Project_data

In [9]:
project_data = {'Species': {'Perch': 0,'Bream': 1,'Roach': 2,'Pike': 3,'Smelt': 4,'Parkki': 5,'Whitefish': 6},
               'Columns': x.columns.tolist()}

project_data

{'Species': {'Perch': 0,
  'Bream': 1,
  'Roach': 2,
  'Pike': 3,
  'Smelt': 4,
  'Parkki': 5,
  'Whitefish': 6},
 'Columns': ['Species', 'Length1', 'Length2', 'Length3', 'Height', 'Width']}

# Test user input Function

In [10]:
Species = 'Bream'
Length1 = 25
Length2 = 27
Length3 = 28
Height = 12.2
Width = 4.50

Species = project_data['Species'][Species]

test_array = np.zeros(len(project_data['Columns']))

test_array[0] = Species
test_array[1] = Length1
test_array[2] = Length2
test_array[3] = Length3
test_array[4] = Height
test_array[5] = Width

predicted_weight = np.around(lin_reg.predict([test_array]), 3)[0]
print(predicted_weight)

498.352


# MODEL_FILE

In [11]:
import pickle
import json


with open('Linear_reg.pkl', 'wb') as f:
    pickle.dump(lin_reg, f)

# PROJECT_DATA_FILE

In [12]:
with open('Fish_project_data.json', 'w') as f:
    json.dump(project_data, f)

In [1]:

dict1 = {"data_columns": ["total_sqft", "bath", "bhk", "1st block jayanagar", "1st phase jp nagar", "2nd phase judicial layout", "2nd stage nagarbhavi", "5th block hbr layout", "5th phase jp nagar", "6th phase jp nagar", "7th phase jp nagar", "8th phase jp nagar", "9th phase jp nagar", "aecs layout", "abbigere", "akshaya nagar", "ambalipura", "ambedkar nagar", "amruthahalli", "anandapura", "ananth nagar", "anekal", "anjanapura", "ardendale", "arekere", "attibele", "beml layout", "btm 2nd stage", "btm layout", "babusapalaya", "badavala nagar", "balagere", "banashankari", "banashankari stage ii", "banashankari stage iii", "banashankari stage v", "banashankari stage vi", "banaswadi", "banjara layout", "bannerghatta", "bannerghatta road", "basavangudi", "basaveshwara nagar", "battarahalli", "begur", "begur road", "bellandur", "benson town", "bharathi nagar", "bhoganhalli", "billekahalli", "binny pete", "bisuvanahalli", "bommanahalli", "bommasandra", "bommasandra industrial area", "bommenahalli", "brookefield", "budigere", "cv raman nagar", "chamrajpet", "chandapura", "channasandra", "chikka tirupathi", "chikkabanavar", "chikkalasandra", "choodasandra", "cooke town", "cox town", "cunningham road", "dasanapura", "dasarahalli", "devanahalli", "devarachikkanahalli", "dodda nekkundi", "doddaballapur", "doddakallasandra", "doddathoguru", "domlur", "dommasandra", "epip zone", "electronic city", "electronic city phase ii", "electronics city phase 1", "frazer town", "gm palaya", "garudachar palya", "giri nagar", "gollarapalya hosahalli", "gottigere", "green glen layout", "gubbalala", "gunjur", "hal 2nd stage", "hbr layout", "hrbr layout", "hsr layout", "haralur road", "harlur", "hebbal", "hebbal kempapura", "hegde nagar", "hennur", "hennur road", "hoodi", "horamavu agara", "horamavu banaswadi", "hormavu", "hosa road", "hosakerehalli", "hoskote", "hosur road", "hulimavu", "isro layout", "itpl", "iblur village", "indira nagar", "jp nagar", "jakkur", "jalahalli", "jalahalli east", "jigani", "judicial layout", "kr puram", "kadubeesanahalli", "kadugodi", "kaggadasapura", "kaggalipura", "kaikondrahalli", "kalena agrahara", "kalyan nagar", "kambipura", "kammanahalli", "kammasandra", "kanakapura", "kanakpura road", "kannamangala", "karuna nagar", "kasavanhalli", "kasturi nagar", "kathriguppe", "kaval byrasandra", "kenchenahalli", "kengeri", "kengeri satellite town", "kereguddadahalli", "kodichikkanahalli", "kodigehaali", "kodigehalli", "kodihalli", "kogilu", "konanakunte", "koramangala", "kothannur", "kothanur", "kudlu", "kudlu gate", "kumaraswami layout", "kundalahalli", "lb shastri nagar", "laggere", "lakshminarayana pura", "lingadheeranahalli", "magadi road", "mahadevpura", "mahalakshmi layout", "mallasandra", "malleshpalya", "malleshwaram", "marathahalli", "margondanahalli", "marsur", "mico layout", "munnekollal", "murugeshpalya", "mysore road", "ngr layout", "nri layout", "nagarbhavi", "nagasandra", "nagavara", "nagavarapalya", "narayanapura", "neeladri nagar", "nehru nagar", "ombr layout", "old airport road", "old madras road", "padmanabhanagar", "pai layout", "panathur", "parappana agrahara", "pattandur agrahara", "poorna pragna layout", "prithvi layout", "r.t. nagar", "rachenahalli", "raja rajeshwari nagar", "rajaji nagar", "rajiv nagar", "ramagondanahalli", "ramamurthy nagar", "rayasandra", "sahakara nagar", "sanjay nagar", "sarakki nagar", "sarjapur", "sarjapur  road", "sarjapura - attibele road", "sector 2 hsr layout", "sector 7 hsr layout", "seegehalli", "shampura", "shivaji nagar", "singasandra", "somasundara palya", "sompura", "sonnenahalli", "subramanyapura", "sultan palaya", "tc palaya", "talaghattapura", "thanisandra", "thigalarapalya", "thubarahalli", "tindlu", "tumkur road", "ulsoor", "uttarahalli", "varthur", "varthur road", "vasanthapura", "vidyaranyapura", "vijayanagar", "vishveshwarya layout", "vishwapriya layout", "vittasandra", "whitefield", "yelachenahalli", "yelahanka", "yelahanka new town", "yelenahalli", "yeshwanthpu

['1st block jayanagar',
 '1st phase jp nagar',
 '2nd phase judicial layout',
 '2nd stage nagarbhavi',
 '5th block hbr layout',
 '5th phase jp nagar',
 '6th phase jp nagar',
 '7th phase jp nagar',
 '8th phase jp nagar',
 '9th phase jp nagar',
 'aecs layout',
 'abbigere',
 'akshaya nagar',
 'ambalipura',
 'ambedkar nagar',
 'amruthahalli',
 'anandapura',
 'ananth nagar',
 'anekal',
 'anjanapura',
 'ardendale',
 'arekere',
 'attibele',
 'beml layout',
 'btm 2nd stage',
 'btm layout',
 'babusapalaya',
 'badavala nagar',
 'balagere',
 'banashankari',
 'banashankari stage ii',
 'banashankari stage iii',
 'banashankari stage v',
 'banashankari stage vi',
 'banaswadi',
 'banjara layout',
 'bannerghatta',
 'bannerghatta road',
 'basavangudi',
 'basaveshwara nagar',
 'battarahalli',
 'begur',
 'begur road',
 'bellandur',
 'benson town',
 'bharathi nagar',
 'bhoganhalli',
 'billekahalli',
 'binny pete',
 'bisuvanahalli',
 'bommanahalli',
 'bommasandra',
 'bommasandra industrial area',
 'bommenaha

In [5]:
dict1 = {"Species": {"Perch": 0, "Bream": 1, "Roach": 2, "Pike": 3, "Smelt": 4, "Parkki": 5, "Whitefish": 6}, "Columns": ["Species", "Length1", "Length2", "Length3", "Height", "Width"]}

var = dict1['Species'].keys()
var
type(var)



dict_keys