Build a regression model.

Provide model output and an interpretation of the results. 

In [1]:
import requests
import pandas as pd
import statsmodels.api as sm

url = "http://api.citybik.es/v2/networks"
response = requests.get(url)

if response.status_code == 200:
    response_data = response.json()
    networks = response_data.get('networks', [])
    vancouver_network = None
    for network in networks:
        if network['location']['city'].lower() == 'vancouver':
            vancouver_network = network
            break

    if vancouver_network:
        network_id = vancouver_network['id']
        details_url = f"http://api.citybik.es/v2/networks/{network_id}"
        details_response = requests.get(details_url)
        
        if details_response.status_code == 200:
            vancouver_details = details_response.json()
            stations_in_vancouver = vancouver_details.get('network', {}).get('stations', [])
        else:
            print(f"Failed to retrieve details for Vancouver network: {details_response.status_code}")
    else:
        print("Vancouver network not found.")
else:
    print(f"Failed to retrieve data: {response.status_code}")

foursquare_headers = {
    "accept": "application/json",
    "Authorization": "fsq3XRUAarAkYkZaUxo7U14qSYIS91CYwbt1bF94/FymeqU="
}
foursquare_url = "https://api.foursquare.com/v3/places/search"

station_data = []

for station in stations_in_vancouver:
    latitude = station['latitude']
    longitude = station['longitude']
    params = {
        'll': f'{latitude},{longitude}',
        'radius': 1000,
        'categories': '13065,13027'
    }
    response = requests.get(foursquare_url, headers=foursquare_headers, params=params)
    
    if response.status_code == 200:
        data = response.json()
        places = data.get('results', [])
        num_places = len(places)
        station_data.append({
            'Latitude': latitude,
            'Longitude': longitude,
            'Number of Restaurants and Bars': num_places
        })
    else:
        print(f"Failed to retrieve data from Foursquare API: {response.status_code}")

df = pd.DataFrame(station_data)

df['Number of Bike Stations'] = [1] * len(df)

X = df[['Number of Restaurants and Bars']]
X = sm.add_constant(X)
y = df['Number of Bike Stations']

ols_model = sm.OLS(y, X).fit()

print(ols_model.summary())

df['Predicted Number of Bike Stations'] = ols_model.predict(X)

most_suitable_location = df.loc[df['Predicted Number of Bike Stations'].idxmax()]

print("Most Suitable Location:")
print(most_suitable_location)

                               OLS Regression Results                              
Dep. Variable:     Number of Bike Stations   R-squared:                        -inf
Model:                                 OLS   Adj. R-squared:                   -inf
Method:                      Least Squares   F-statistic:                    -254.0
Date:                     Sat, 31 Aug 2024   Prob (F-statistic):               1.00
Time:                             20:42:26   Log-Likelihood:                 8111.5
No. Observations:                      256   AIC:                        -1.622e+04
Df Residuals:                          254   BIC:                        -1.621e+04
Df Model:                                1                                         
Covariance Type:                 nonrobust                                         
                                     coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------

  return 1 - self.ssr/self.centered_tss


# Stretch

How can you turn the regression model into a classification model?

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import Binarizer

file_path = 'C:/Users/kasun/Desktop/Data Analytics/Project 02/practice test/Joining API Question/citybike_created_df.csv'
df_vancouver_data = pd.read_csv(file_path)

file_path = 'C:/Users/kasun/Desktop/Data Analytics/Project 02/practice test/Joining API Question/foursquare_created_df.csv'
df_dining_and_drinking = pd.read_csv(file_path)

df_vancouver_data['Type'] = 'Bike Station'
df_dining_and_drinking['Type'] = 'Restaurant/Bar'
combined_df = pd.concat([df_vancouver_data, df_dining_and_drinking])



# Prepare features and target variable
combined_df = combined_df.dropna(subset=['Latitude', 'Longitude'])
X = combined_df[['Longitude']]
y = combined_df['Latitude']

bins = [-float('inf'), 49.25, 49.30, float('inf')]
labels = ['Low', 'Medium', 'High']
y_binned = pd.cut(y, bins=bins, labels=labels)

scaler = Binarizer()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_binned, test_size=0.2, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[ 0  1]
 [ 0 53]]
              precision    recall  f1-score   support

         Low       0.00      0.00      0.00         1
      Medium       0.98      1.00      0.99        53

    accuracy                           0.98        54
   macro avg       0.49      0.50      0.50        54
weighted avg       0.96      0.98      0.97        54



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
