## Multiple Variable Linear Regression Analysis of Children's Vs. Parent's Height

In [1]:
import numpy as np
import pandas as pd
import tkinter as tk 

from sklearn.linear_model import LinearRegression

import matplotlib
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg

%matplotlib inline
 
url = "http://www.randomservices.org/random/data"

In [2]:
df = pd.read_csv('family.txt', delimiter = '\t')
df

Unnamed: 0,Family,Dad,Mom,Gender,Height_of_a_Child,Number_of_Kids
0,1,78.5,67.0,M,73.2,4
1,1,78.5,67.0,F,69.2,4
2,1,78.5,67.0,F,69.0,4
3,1,78.5,67.0,F,69.0,4
4,2,75.5,66.5,M,73.5,4
...,...,...,...,...,...,...
893,136A,68.5,65.0,M,68.5,8
894,136A,68.5,65.0,M,67.7,8
895,136A,68.5,65.0,F,64.0,8
896,136A,68.5,65.0,F,63.5,8


In [3]:
print("\n", df.describe())   # Print out the basic numerical statistics of each column
print("\n", df.dtypes)   # Print out the data types of each column
print("\n", df.head())   # Print out column headers as well as the first 5 records


               Dad         Mom  Height_of_a_Child  Number_of_Kids
count  898.000000  898.000000         898.000000      898.000000
mean    69.232851   64.084410          66.760690        6.135857
std      2.470256    2.307025           3.582918        2.685156
min     62.000000   58.000000          56.000000        1.000000
25%     68.000000   63.000000          64.000000        4.000000
50%     69.000000   64.000000          66.500000        6.000000
75%     71.000000   65.500000          69.700000        8.000000
max     78.500000   70.500000          79.000000       15.000000

 Family                object
Dad                  float64
Mom                  float64
Gender                object
Height_of_a_Child    float64
Number_of_Kids         int64
dtype: object

   Family   Dad   Mom Gender  Height_of_a_Child  Number_of_Kids
0      1  78.5  67.0      M               73.2               4
1      1  78.5  67.0      F               69.2               4
2      1  78.5  67.0      F     

In [4]:
# Extract data values to fit to a model 

b=df[['Dad', 'Mom']]   # Two regression variables used, 'Dad' and 'Mom'
c=df['Height_of_a_Child']

In [5]:
# Creating a Linear Regression model with both Dad's height and Mom's height to generate the child's height

Bmodel = LinearRegression(fit_intercept=True)
Bmodel.fit(b,c)
print('Coefficient: \n', Bmodel.coef_)
print('')
print('Intercept: \n', Bmodel.intercept_)

Coefficient: 
 [0.37989697 0.28321451]

Intercept: 
 22.309705489589717


In [6]:
# Predicting a height value for a child using the Linear Model with both Dad's height and Mom's height on the child
cpred = Bmodel.predict([[75, 60]])
print("Prediction for Child's height using both Dad's height and Mom's height",cpred.round(2))

Prediction for Child's height using both Dad's height and Mom's height [67.79]


In [8]:
# GUI to input values for multiple regression variables

root= tk.Tk()

canvas1 = tk.Canvas(root, width = 500, height = 300)
canvas1.pack()

# with sklearn
Intercept_result = ('Intercept: ', Bmodel.intercept_)
label_Intercept = tk.Label(root, text=Intercept_result, justify = 'center')
canvas1.create_window(260, 220, window=label_Intercept)

# with sklearn
Coefficients_result  = ('Coefficients: ', Bmodel.coef_)
label_Coefficients = tk.Label(root, text=Coefficients_result, justify = 'center')
canvas1.create_window(260, 240, window=label_Coefficients)

# Dad's Height Label of the input boxes
label1 = tk.Label(root, text="   Type Dad's Height: ")
canvas1.create_window(110, 100, window=label1)

entry1 = tk.Entry (root) # create 1st entry box
canvas1.create_window(270, 100, window=entry1)

# Mom's Height Label of the input boxes
label2 = tk.Label(root, text="Type Mom's Height: ")
canvas1.create_window(120, 120, window=label2)

entry2 = tk.Entry (root) # create 2nd entry box
canvas1.create_window(270, 120, window=entry2)

def values(): 
    global New_Dad_Height #our 1st input variable
    New_Dad_Height = float(entry1.get()) 
    
    global New_Mom_Height #our 2nd input variable
    New_Mom_Height = float(entry2.get()) 
    
    Prediction_result  = ("Predicted Child's Height: ", Bmodel.predict([[New_Dad_Height ,New_Mom_Height]]))
    label_Prediction = tk.Label(root, text= Prediction_result, bg='orange')
    canvas1.create_window(260, 280, window=label_Prediction)
    
button1 = tk.Button (root, text="Predict A Child's Height",command=values, bg='orange') # button to call the 'values' command above 
canvas1.create_window(270, 150, window=button1)
 
# Mom's Height Vs. Child's Height
figure3 = plt.Figure(figsize=(5,4), dpi=100)
ax3 = figure3.add_subplot(111)
ax3.scatter(df['Dad'].astype(float),df['Height_of_a_Child'].astype(float), color = 'aqua')
scatter3 = FigureCanvasTkAgg(figure3, root) 
scatter3.get_tk_widget().pack(side=tk.RIGHT, fill=tk.BOTH)
ax3.legend(['Height_of_a_Child']) 
ax3.set_xlabel("Dad's Height")
ax3.set_title("Dad\'s Height Vs. Child\'s Height")

# Dad's Height Vs. Child's Height
figure4 = plt.Figure(figsize=(5,4), dpi=100)
ax4 = figure4.add_subplot(111)
ax4.scatter(df['Mom'].astype(float),df['Height_of_a_Child'].astype(float), color = 'pink')
scatter4 = FigureCanvasTkAgg(figure4, root) 
scatter4.get_tk_widget().pack(side=tk.RIGHT, fill=tk.BOTH)
ax4.legend(['Height_of_a_Child']) 
ax4.set_xlabel("Mom's Height")
ax4.set_title("Mom\'s Height Vs. Child\'s Height")

root.mainloop()