In [0]:
import pandas as pd
import numpy as np
import sklearn

In [0]:
data = pd.read_csv("https://raw.githubusercontent.com/Mounika-Kajjam/Datasets/master/weather_data.csv")
data

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,-99999,7,Sunny
2,1/3/2017,28,-99999,Snow
3,1/4/2017,-99999,7,0
4,1/5/2017,32,-99999,Rain
5,1/6/2017,31,2,Sunny
6,1/6/2017,34,5,0


In [0]:
data = data.replace({"temperature":-99999,"windspeed":-99999,"event":"0"},np.nan)
data

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32.0,6.0,Rain
1,1/2/2017,,7.0,Sunny
2,1/3/2017,28.0,,Snow
3,1/4/2017,,7.0,
4,1/5/2017,32.0,,Rain
5,1/6/2017,31.0,2.0,Sunny
6,1/6/2017,34.0,5.0,


**INTERPOLATION :**
-  It guess a random  suiitable value from column based on other  values.

In [0]:
int_data = data.interpolate()
int_data

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32.0,6.0,Rain
1,1/2/2017,30.0,7.0,Sunny
2,1/3/2017,28.0,7.0,Snow
3,1/4/2017,30.0,7.0,
4,1/5/2017,32.0,4.5,Rain
5,1/6/2017,31.0,2.0,Sunny
6,1/6/2017,34.0,5.0,


In [0]:

dropd = data.dropna()
dropd


Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32.0,6.0,Rain
5,1/6/2017,31.0,2.0,Sunny


In [0]:
dropd = data.dropna(how="any",axis=1)
dropd

Unnamed: 0,day
0,1/1/2017
1,1/2/2017
2,1/3/2017
3,1/4/2017
4,1/5/2017
5,1/6/2017
6,1/6/2017


In [0]:
dropd = data.dropna(how="all")
dropd  # all elemnets in row are zero then any works

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32.0,6.0,Rain
1,1/2/2017,,7.0,Sunny
2,1/3/2017,28.0,,Snow
3,1/4/2017,,7.0,
4,1/5/2017,32.0,,Rain
5,1/6/2017,31.0,2.0,Sunny
6,1/6/2017,34.0,5.0,


**HANDLING MISSING VALUES USING SCIKIT LEARN**

In [0]:
# SIMPLE IMPUTER

In [0]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values = np.nan,strategy = "mean")
# missing_values and strategy are mandatory

In [0]:
# It is a 2 step transformation
# Simple imputer only for numeric columns
imputer.fit(data.iloc[:,1:3]) # fit

SimpleImputer(add_indicator=False, copy=True, fill_value=None,
              missing_values=nan, strategy='mean', verbose=0)

In [0]:
data.iloc[:,1:3] = imputer.transform(data.iloc[:,1:3]) # transform
data

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32.0,6.0,Rain
1,1/2/2017,31.4,7.0,Sunny
2,1/3/2017,28.0,5.4,Snow
3,1/4/2017,31.4,7.0,
4,1/5/2017,32.0,5.4,Rain
5,1/6/2017,31.0,2.0,Sunny
6,1/6/2017,34.0,5.0,


**ENCODING CATEGORICAL  COLUMNS**
+ Nominal(no ordering) Eg : city names
+ Ordinal(  ordering ) Eg : designations
- using pandas -> get_dummies()

In [0]:
price = pd.read_csv("https://raw.githubusercontent.com/Mounika-Kajjam/Datasets/master/homeprices.csv")
price

Unnamed: 0,town,area,price
0,monroe township,2600,550000
1,monroe township,3000,565000
2,monroe township,3200,610000
3,monroe township,3600,680000
4,monroe township,4000,725000
5,west windsor,2600,585000
6,west windsor,2800,615000
7,west windsor,3300,650000
8,west windsor,3600,710000
9,robinsville,2600,575000


In [0]:
# convert  categorical column into numerical column
price["town"].value_counts()

monroe township    5
west windsor       4
robinsville        4
Name: town, dtype: int64

In [0]:
dummy_set  =  pd.get_dummies(price.town)
dummy_set

Unnamed: 0,monroe township,robinsville,west windsor
0,1,0,0
1,1,0,0
2,1,0,0
3,1,0,0
4,1,0,0
5,0,0,1
6,0,0,1
7,0,0,1
8,0,0,1
9,0,1,0


In [0]:
price["town"]

0     monroe township
1     monroe township
2     monroe township
3     monroe township
4     monroe township
5        west windsor
6        west windsor
7        west windsor
8        west windsor
9         robinsville
10        robinsville
11        robinsville
12        robinsville
Name: town, dtype: object

In [0]:
merge_data = pd.concat([price,dummy_set],axis=1)
merge_data

Unnamed: 0,town,area,price,monroe township,robinsville,west windsor
0,monroe township,2600,550000,1,0,0
1,monroe township,3000,565000,1,0,0
2,monroe township,3200,610000,1,0,0
3,monroe township,3600,680000,1,0,0
4,monroe township,4000,725000,1,0,0
5,west windsor,2600,585000,0,0,1
6,west windsor,2800,615000,0,0,1
7,west windsor,3300,650000,0,0,1
8,west windsor,3600,710000,0,0,1
9,robinsville,2600,575000,0,1,0


In [0]:
pd.get_dummies(price,columns=["town"])

Unnamed: 0,area,price,town_monroe township,town_robinsville,town_west windsor
0,2600,550000,1,0,0
1,3000,565000,1,0,0
2,3200,610000,1,0,0
3,3600,680000,1,0,0
4,4000,725000,1,0,0
5,2600,585000,0,0,1
6,2800,615000,0,0,1
7,3300,650000,0,0,1
8,3600,710000,0,0,1
9,2600,575000,0,1,0


In [0]:
pd.get_dummies(price,columns=["town"],drop_first=True)

Unnamed: 0,area,price,town_robinsville,town_west windsor
0,2600,550000,0,0
1,3000,565000,0,0
2,3200,610000,0,0
3,3600,680000,0,0
4,4000,725000,0,0
5,2600,585000,0,1
6,2800,615000,0,1
7,3300,650000,0,1
8,3600,710000,0,1
9,2600,575000,1,0


In [0]:
for i in range(0,4):
  x = [int(i) for i in input().split(",")]
  p = 1
  if x[len(x)-1]==7:
      print("-1")
  else:
    for i in x ;
      if i==7:
        p = 1
      else :
        p *= i
    print(p)

1,4,5
20
1,8,7
-1
3,5,7
-1
2,4,5
40
