In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
# Read the excel file into a pandas DataFrame

combined_data = pd.read_excel('2010-2019combineddata.xls')
combined_data.set_index('Current residence in', inplace=True)
combined_data.head()

In [None]:
# Assign the data to X and y
# Note: Sklearn requires a two-dimensional array of values
# so we use reshape to create this

X = combined_data["Avg. Home Price"].values.reshape(-1, 1)
y = combined_data["Net Migration"].values.reshape(-1, 1)

print("Shape: ", X.shape, y.shape)

In [None]:
# Plot the data to see if a linear trend exists

plt.scatter(X, y)
plt.xlabel("Avg. Home Price")
plt.ylabel("Net Migration")

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [None]:
# Create the model

from sklearn.linear_model import LinearRegression

model = LinearRegression()

In [None]:
# Fit the model to the training data. 
model.fit(X_train, y_train)

In [None]:
print('Weight coefficients: ', model.coef_)
print('y-axis intercept: ', model.intercept_)

In [None]:
# Note: we have to transform our min and max values 
# This is the required format for `model.predict()`

x_min = np.array([[X.min()]])
x_max = np.array([[X.max()]])
print(f"Min X Value: {x_min}")
print(f"Max X Value: {x_max}")

In [None]:
# Calculate the y_min and y_max using model.predict and x_min and x_max

y_min = model.predict(x_min)
y_max = model.predict(x_max)

In [None]:
# Plot X and y using plt.scatter
# Plot the model fit line using [x_min[0], x_max[0]], [y_min[0], y_max[0]]

plt.scatter(X, y, c='blue')
plt.plot([x_min[0], x_max[0]], [y_min[0], y_max[0]], c='red')
plt.xlabel("Avg Home Price")
plt.ylabel("Net Migration")

In [None]:
from sklearn.metrics import mean_squared_error, r2_score

# Use our model to make predictions
predicted = model.predict(X_test)

# Score the predictions with mse and r2
mse = mean_squared_error(y_test, predicted)
r2 = r2_score(y_test, predicted)

print(f"Mean Squared Error (MSE): {mse}")
print(f"R-squared (R2 ): {r2}")

In [None]:
model.score(X_test, y_test)

Compariing MEDIAN AND NET MIGRATION

In [None]:
# Assign the data to X and y
# Note: Sklearn requires a two-dimensional array of values
# so we use reshape to create this

X = combined_data["Median Income"].values.reshape(-1, 1)
y = combined_data["Net Migration"].values.reshape(-1, 1)

print("Shape: ", X.shape, y.shape)

In [None]:
# Plot the data to see if a linear trend exists
plt.scatter(X, y)
plt.xlabel("Median Income")
plt.ylabel("Net Migration")

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# Create the model

from sklearn.linear_model import LinearRegression
model = LinearRegression()

# Fit the model to the training data. 
model.fit(X_train, y_train)

In [None]:
print('Weight coefficients: ', model.coef_)
print('y-axis intercept: ', model.intercept_)
# Note: we have to transform our min and max values 
# This is the required format for `model.predict()`

x_min = np.array([[X.min()]])
x_max = np.array([[X.max()]])

# Calculate the y_min and y_max using model.predict and x_min and x_max

y_min = model.predict(x_min)
y_max = model.predict(x_max)

In [None]:
# Plot X and y using plt.scatter
# Plot the model fit line using [x_min[0], x_max[0]], [y_min[0], y_max[0]]

plt.scatter(X, y, c='blue')
plt.plot([x_min[0], x_max[0]], [y_min[0], y_max[0]], c='red')
plt.xlabel("Median Income")
plt.ylabel("Net Migration")

In [None]:
from sklearn.metrics import mean_squared_error, r2_score

# Use our model to make predictions
predicted = model.predict(X_test)

# Score the predictions with mse and r2
mse = mean_squared_error(y_test, predicted)
r2 = r2_score(y_test, predicted)

print(f"Mean Squared Error (MSE): {mse}")
print(f"R-squared (R2 ): {r2}")
model.score(X_test, y_test)

UNEMPLOYMENT RATE AND NET MIGRATION

In [None]:
# Assign the data to X and y
# Note: Sklearn requires a two-dimensional array of values
# so we use reshape to create this

X = combined_data["Unemployment Rate"].values.reshape(-1, 1)
y = combined_data["Net Migration"].values.reshape(-1, 1)

print("Shape: ", X.shape, y.shape)

In [None]:
# Plot the data to see if a linear trend exists
plt.scatter(X, y)
plt.xlabel("Unemployment Rate")
plt.ylabel("Net Migration")

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# Create the model

from sklearn.linear_model import LinearRegression
model = LinearRegression()

# Fit the model to the training data. 
model.fit(X_train, y_train)

In [None]:
print('Weight coefficients: ', model.coef_)
print('y-axis intercept: ', model.intercept_)
# Note: we have to transform our min and max values 
# This is the required format for `model.predict()`

x_min = np.array([[X.min()]])
x_max = np.array([[X.max()]])

# Calculate the y_min and y_max using model.predict and x_min and x_max

y_min = model.predict(x_min)
y_max = model.predict(x_max)

In [None]:
# Plot X and y using plt.scatter
# Plot the model fit line using [x_min[0], x_max[0]], [y_min[0], y_max[0]]

plt.scatter(X, y, c='blue')
plt.plot([x_min[0], x_max[0]], [y_min[0], y_max[0]], c='red')
plt.xlabel("Unemployment Rate")
plt.ylabel("Net Migration")

In [None]:
from sklearn.metrics import mean_squared_error, r2_score

# Use our model to make predictions
predicted = model.predict(X_test)

# Score the predictions with mse and r2
mse = mean_squared_error(y_test, predicted)
r2 = r2_score(y_test, predicted)

print(f"Mean Squared Error (MSE): {mse}")
print(f"R-squared (R2 ): {r2}")
model.score(X_test, y_test)

UNEMPLOYMENT AND MEDIAN INCOME

In [None]:
# Assign the data to X and y
# Note: Sklearn requires a two-dimensional array of values
# so we use reshape to create this

X = combined_data["Unemployment Rate"].values.reshape(-1, 1)
y = combined_data["Median Income"].values.reshape(-1, 1)

print("Shape: ", X.shape, y.shape)

In [None]:
# Plot the data to see if a linear trend exists
plt.scatter(X, y)
plt.xlabel("Unemployment Rate")
plt.ylabel("Median Income")

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# Create the model

from sklearn.linear_model import LinearRegression
model = LinearRegression()

# Fit the model to the training data. 
model.fit(X_train, y_train)

In [None]:
print('Weight coefficients: ', model.coef_)
print('y-axis intercept: ', model.intercept_)
# Note: we have to transform our min and max values 
# This is the required format for `model.predict()`

x_min = np.array([[X.min()]])
x_max = np.array([[X.max()]])

# Calculate the y_min and y_max using model.predict and x_min and x_max

y_min = model.predict(x_min)
y_max = model.predict(x_max)

In [None]:
# Plot X and y using plt.scatter
# Plot the model fit line using [x_min[0], x_max[0]], [y_min[0], y_max[0]]

plt.scatter(X, y, c='blue')
plt.plot([x_min[0], x_max[0]], [y_min[0], y_max[0]], c='red')
plt.xlabel("Unemployment Rate")
plt.ylabel("Median Income")

In [None]:
from sklearn.metrics import mean_squared_error, r2_score

# Use our model to make predictions
predicted = model.predict(X_test)

# Score the predictions with mse and r2
mse = mean_squared_error(y_test, predicted)
r2 = r2_score(y_test, predicted)

print(f"Mean Squared Error (MSE): {mse}")
print(f"R-squared (R2 ): {r2}")
model.score(X_test, y_test)