# Step1: Load Data

In [2]:
import pandas as pd
data = pd.read_csv('weight-height.csv')

In [3]:
data.shape

(10000, 3)

In [4]:
data.head(10)

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.04247
4,Male,69.881796,206.349801
5,Male,67.253016,152.212156
6,Male,68.785081,183.927889
7,Male,68.348516,167.971111
8,Male,67.01895,175.92944
9,Male,63.456494,156.399676


In [6]:
data.describe()

Unnamed: 0,Height,Weight
count,10000.0,10000.0
mean,66.36756,161.440357
std,3.847528,32.108439
min,54.263133,64.700127
25%,63.50562,135.818051
50%,66.31807,161.212928
75%,69.174262,187.169525
max,78.998742,269.989698


In [7]:
data.Gender = [1 if g=="Male" else 0 for g in data.Gender]
data.head()

Unnamed: 0,Gender,Height,Weight
0,1,73.847017,241.893563
1,1,68.781904,162.310473
2,1,74.110105,212.740856
3,1,71.730978,220.04247
4,1,69.881796,206.349801


# Step2: Separate input data and output data

In [8]:
X = data[['Gender', 'Height']]
y = data[['Weight']]

# Step3: Split Data into Training data and Testing Data

In [9]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=0, test_size=.40)

# Step:4 Model Selection

In [10]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()

# Step:5 Model Training

In [11]:
model.fit(X_train,y_train)

# Step6: Prediction on Training Data

In [12]:
y_pred_train = model.predict(X_train)

In [13]:
y_train.values.reshape(6000,)

array([141.8754655, 100.743008 , 186.2707727, ..., 162.4759573,
       136.7830224, 188.4506741], shape=(6000,))

In [14]:
y_pred_train.reshape(6000,)

array([140.48528569, 107.6858303 , 167.11435478, ..., 177.74373227,
       126.38895867, 196.38490546], shape=(6000,))

In [15]:
pd.DataFrame({"y_train":y_train.values.reshape(6000,), "y_pred":y_pred_train.reshape(6000,) })

Unnamed: 0,y_train,y_pred
0,141.875465,140.485286
1,100.743008,107.685830
2,186.270773,167.114355
3,124.460003,131.198641
4,130.012336,126.931643
...,...,...
5995,97.263881,116.227086
5996,174.156893,173.358317
5997,162.475957,177.743732
5998,136.783022,126.388959


# Step7: Prediction on Test  Data 

In [16]:
y_pred_test = model.predict(X_test)

In [17]:
pd.DataFrame({"y_test":y_test.values.reshape(4000,), "y_pred":y_pred_test.reshape(4000,) })

Unnamed: 0,y_test,y_pred
0,138.085796,141.852482
1,187.363366,176.582863
2,216.533191,219.755008
3,131.761443,148.738162
4,157.718438,162.134960
...,...,...
3995,192.553956,193.544039
3996,152.999492,143.205847
3997,102.098733,114.823234
3998,176.948433,161.538627


# Step8: Calculating Accuracy

In [18]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [19]:
# Training Data
r2_score(y_train, y_pred_train)

0.9051111369071825

In [20]:
# Testing Data
r2_score(y_test, y_pred_test)

0.8991766120141077

# Step9: Saving Model in a function

In [21]:
def predict_weight(info):
    return (model.predict(info) * 0.453592)

In [22]:
height = float(input("Enter you height in feet"))
gender = float(input("If you are a Male, type '1', if not type '0'"))
print(f"Your predicted weight is {predict_weight([[gender, height * 12]])} kgs")

Enter you height in feet 5.75
If you are a Male, type '1', if not type '0' 1


Your predicted weight is [[84.76428179]] kgs


