In [None]:
# -*- coding: utf-8 -*-
# %% [markdown]
# # Bike SVI Analysis Notebook
# 
# This notebook consolidates the data processing, feature engineering, and analysis for the bike_svi project.

## Imports and Setup

In [None]:
import os
import logging
from pathlib import Path
import pandas as pd
import numpy as np
from dotenv import find_dotenv, load_dotenv

Import functions from bike_svi

In [None]:
from bike_svi.features.build_features import month_difference, main as build_features_main
from bike_svi.data.make_dataset import main as make_dataset_main

## Load Environment Variables

In [None]:
load_dotenv(find_dotenv())

## Set Up Directories and API Key

In [None]:
log_path = "logs/logs.txt"
data_folder = "data"
input_folder = data_folder / "raw/cities"
interim_folder = data_folder / "interim/cities"
output_folder = data_folder / "processed/cities"
# You need to set the GSV_API_KEY environment variable to run this notebook
gsv_api_key = os.getenv("GSV_API_KEY")

## Process Cities

In [None]:
city_list = []
with open(data_folder / "external/city_list.txt", "r") as f:
    for line in f:
        city_list.append(line.strip())

## Make Dataset

In [None]:
for city in city_list:
    print(f"Processing city: {city}")
    dir_input = Path(input_folder) / city
    dir_temp = Path(interim_folder) / city / "temp"
    dir_output = Path(interim_folder) / city
    dir_output.mkdir(parents=True, exist_ok=True)
    dir_temp.mkdir(parents=True, exist_ok=True)
    
    make_dataset_main(gsv_api_key, log_path, dir_input, dir_temp, dir_output, city)

## Build Features

In [None]:
for city in city_list:
    if city == "Montreal":
        continue
    print(f"Building features for city: {city}")
    dir_input = Path(interim_folder) / city
    dir_output = Path(output_folder) / city
    dir_output.mkdir(parents=True, exist_ok=True)
    build_features_main(dir_input, dir_output)

Now please move over to bike_svi_analysis.Rmd to see the analysis.