In [1]:
# Import Dependencies
import psycopg2
import sys
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [2]:
# Connection parameters
param_dic = {
    "host"      : "whr.csnc4l4qvlqd.us-east-2.rds.amazonaws.com",
    "database"  : "postgres",
    "user"      : "postgres",
    "password"  : "UCBwhr2021"
}

# Create connect function to connect to PostgresSQL server
def connect(param_dic):
    conn = None
    try:
        print("Connecting to the PostgreSQL database...")
        conn = psycopg2.connect(**param_dic)
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        sys.exit(1)
    print ("Connection successful.")
    return conn
    

In [None]:
# Function to get the dataframe
def postgresql_to_dataframe(conn, select_query, column_names):
    cursor = conn.cursor()
    try:
        cursor.execute(select_query)
    except (Exception, psycopg2.DatabaseError) as error:
            print("Error: %s" % error)
            cursor.close()
    
    # Get list of tuples
    tuples = cursor.fetchall()
    cursor.close()
    
    # Create pandas dataframe
    df = pd.DataFrame(tuples, columns=column_names)
    return df

In [None]:
# Connect to database
conn = connect(param_dic)

column_names = ["country", "happinessrank", "happinessscore", "gdp", "family", "lifeexpectancy", "freedom", "generosity", "trust", "lat", "lng", "alcohol_liperyear"]
df = postgresql_to_dataframe(conn, "select * from whr_2019", column_names)
df.head(25)

In [None]:
# Drop unnecessariy columns
df = df.drop(columns=["country", "happinessrank", "lat", "lng"], axis=1)
df = df.fillna(value=np.nan)
df.dtypes

In [None]:
df[0:7] = df[0:7].astype(float, errors = 'raise')
df.head()

## Predicting GDP - Happiness

In [None]:
# GDP
X = df.gdp
y = df.happinessscore
X_b = np.c_[np.ones((154, 1)), X]  # Adding the bias term which is equal to 1

# Dividing the data into train and test sets    
X_train, X_test, y_train, y_test = train_test_split(X_b, y, test_size=0.2, random_state=42)

theta_optimize = np.linalg.inv(X_train.T.dot(X_train)).dot(X_train.T).dot(y_train)

In [None]:
# Predicting new data with the obtained feature weights
y_pred = X_test.dot(theta_optimize)
r2_score(y_test, y_pred)

In [None]:
# Plot
x = np.array(df.gdp)
y = np.array(df.happinessscore)
plt.xlabel("GDP")
plt.ylabel("Happiness")
plt.plot(x, y, 'o')
m, b = np.polyfit(x, y, 1)
plt.plot(x, m*x + b)



## Predicting Family - Happiness

In [None]:
# Family
X = df.family
y = df.happinessscore
X_b = np.c_[np.ones((777, 1)), X]  # Adding the bias term which is equal to 1

# Dividing the data into train and test sets    
X_train, X_test, y_train, y_test = train_test_split(X_b, y, test_size=0.2, random_state=42)

theta_optimize = np.linalg.inv(X_train.T.dot(X_train)).dot(X_train.T).dot(y_train)

In [None]:
# Predicting new data with the obtained feature weights
y_pred = X_test.dot(theta_optimize)
r2_score(y_test, y_pred)

In [None]:
# Plot
x = np.array(df.family)
y = np.array(df.happinessscore)
plt.xlabel("Family")
plt.ylabel("Happiness")
plt.plot(x, y, 'o')
m, b = np.polyfit(x, y, 1)
plt.plot(x, m*x + b)

## Predicting Life Expectancy - Happiness

In [None]:
# Life Expectancy
X = df.lifeexpectancy
y = df.happinessscore
X_b = np.c_[np.ones((777, 1)), X]  # Adding the bias term which is equal to 1

# Dividing the data into train and test sets    
X_train, X_test, y_train, y_test = train_test_split(X_b, y, test_size=0.2, random_state=42)

theta_optimize = np.linalg.inv(X_train.T.dot(X_train)).dot(X_train.T).dot(y_train)

In [None]:
# Predicting new data with the obtained feature weights
y_pred = X_test.dot(theta_optimize)
r2_score(y_test, y_pred)

In [None]:
# Plot
x = np.array(df.lifeexpectancy)
y = np.array(df.happinessscore)
plt.xlabel("Life Expectancy")
plt.ylabel("Happiness")
plt.plot(x, y, 'o')
m, b = np.polyfit(x, y, 1)
plt.plot(x, m*x + b)

## Predicting Freedom - Happiness

In [None]:
# Freedom
X = df.freedom
y = df.happinessscore
X_b = np.c_[np.ones((777, 1)), X]  # Adding the bias term which is equal to 1

# Dividing the data into train and test sets    
X_train, X_test, y_train, y_test = train_test_split(X_b, y, test_size=0.2, random_state=42)

theta_optimize = np.linalg.inv(X_train.T.dot(X_train)).dot(X_train.T).dot(y_train)

In [None]:
# Predicting new data with the obtained feature weights
y_pred = X_test.dot(theta_optimize)
r2_score(y_test, y_pred)

In [None]:
# Plot
x = np.array(df.freedom)
y = np.array(df.happinessscore)
plt.xlabel("Freedom")
plt.ylabel("Happiness")
plt.plot(x, y, 'o')
m, b = np.polyfit(x, y, 1)
plt.plot(x, m*x + b)

## Predicting Trust - Happiness

In [None]:
# Trust
X = df.trust
y = df.happinessscore
X_b = np.c_[np.ones((777, 1)), X]  # Adding the bias term which is equal to 1

# Dividing the data into train and test sets    
X_train, X_test, y_train, y_test = train_test_split(X_b, y, test_size=0.2, random_state=42)

theta_optimize = np.linalg.inv(X_train.T.dot(X_train)).dot(X_train.T).dot(y_train)

In [None]:
# Predicting new data with the obtained feature weights
y_pred = X_test.dot(theta_optimize)
r2_score(y_test, y_pred)

In [None]:
# Plot
x = np.array(df.trust)
y = np.array(df.happinessscore)
plt.xlabel("Trust")
plt.ylabel("Happiness")
plt.plot(x, y, 'o')
m, b = np.polyfit(x, y, 1)
plt.plot(x, m*x + b)

## Predicting Generosity - Happiness

In [None]:
# Generosity
X = df.generosity
y = df.happinessscore
X_b = np.c_[np.ones((777, 1)), X]  # Adding the bias term which is equal to 1

# Dividing the data into train and test sets    
X_train, X_test, y_train, y_test = train_test_split(X_b, y, test_size=0.2, random_state=42)

theta_optimize = np.linalg.inv(X_train.T.dot(X_train)).dot(X_train.T).dot(y_train)

In [None]:
# Predicting new data with the obtained feature weights
y_pred = X_test.dot(theta_optimize)
r2_score(y_test, y_pred)

In [None]:
# Plot
x = np.array(df.generosity)
y = np.array(df.happinessscore)
plt.xlabel("Generosity")
plt.ylabel("Happiness")
plt.plot(x, y, 'o')
m, b = np.polyfit(x, y, 1)
plt.plot(x, m*x + b)

## Predicting Alcohol Consumption - Happiness

In [None]:
# Alcohol Consumption
X = df.alcohol_liperyear
y = df.happinessscore
X_b = np.c_[np.ones((777, 1)), X]  # Adding the bias term which is equal to 1

# Dividing the data into train and test sets    
X_train, X_test, y_train, y_test = train_test_split(X_b, y, test_size=0.2, random_state=42)

theta_optimize = np.linalg.inv(X_train.T.dot(X_train)).dot(X_train.T).dot(y_train)

In [None]:
# Predicting new data with the obtained feature weights
y_pred = X_test.dot(theta_optimize)
r2_score(y_test, y_pred)

In [None]:
# Plot
x = np.array(df.alcohol_liperyear)
y = np.array(df.happinessscore)
plt.xlabel("Alcohol Consumption")
plt.ylabel("Happiness")
plt.plot(x, y, 'o')
m, b = np.polyfit(x, y, 1)
plt.plot(x, m*x + b)

In [None]:
# Close the connection
conn.close()