# Data Processing

The purpose of this notebook is to download data and process it. As a result we will get a dataset ready for further analyses and modeling.

# Table of Contents

[1. Imports & Environment Configuration](#Imports-&-Environment-Configuration)

# Imports & Environment Configuration

In [1]:
%load_ext autoreload

import json
import matplotlib.pyplot as plt
import os
import pandas

if os.path.basename(os.getcwd()) == 'notebooks':
    # Make it possible to import modules
    import sys
    sys.path.append("../") 

from src.data.blob_downloader import BlobDownloader

In [2]:
# let matplotlib plots be part of Jupyter Notebook
%matplotlib inline

# set up higher resolution for matplotlib plots
%config InlineBackend.figure_format = 'retina'

## Environment Configuration Variables

In [3]:
# Determine the root folder of the project. The current working directory 
# may vary depending on whether the notebook is run in Jupyter Notebook or VS Code
if os.path.basename(os.getcwd()) == 'notebooks':
    project_root_folder = os.path.abspath(os.path.join(os.getcwd(), '..'))   
else:
    project_root_folder = os.getcwd()
    
# Set up paths to data folders
data_raw_folder = os.path.join(project_root_folder, 'data', 'raw')

# Load data

In [4]:
# Provide config details for the Azure Storage container with data.
account_name='<storage_account_name>'
account_key='<storage_account_key>'
container_name='<storage_container_name>' 

# Alternatively, they can be grabbed from the 'local.settings.json' file (used by Azure Functions), if it exists.
local_settings_file_path = os.path.join(project_root_folder, 'src', 'azurefunctions', 'local.settings.json')
if os.path.exists(local_settings_file_path):
    with open(local_settings_file_path, 'r') as f:
        local_settings = json.load(f)
        
    account_name = local_settings['Values']['storage_account_name']
    account_key = local_settings['Values']['storage_account_key']
    container_name = local_settings['Values']['storage_container_name']

In [5]:
# Download all data from Azure Blob Storage and save it locally 
blob_downloader = BlobDownloader(account_name, account_key, container_name)
blob_downloader.download_blobs_from_storage_and_save_to_folder(data_raw_folder)

storia_przejazdow_2019-7-15_12_24_35.csv
File already downloaded: Historia_przejazdow_2019-7-15_13_24_37.csv
File already downloaded: Historia_przejazdow_2019-7-15_14_25_35.csv
File already downloaded: Historia_przejazdow_2019-7-15_1_14_37.csv
File already downloaded: Historia_przejazdow_2019-7-15_2_15_34.csv
File already downloaded: Historia_przejazdow_2019-7-15_3_16_35.csv
File already downloaded: Historia_przejazdow_2019-7-15_4_16_39.csv
File already downloaded: Historia_przejazdow_2019-7-15_5_17_35.csv
File already downloaded: Historia_przejazdow_2019-7-15_6_18_41.csv
File already downloaded: Historia_przejazdow_2019-7-15_7_19_37.csv
File already downloaded: Historia_przejazdow_2019-7-15_8_20_35.csv
File already downloaded: Historia_przejazdow_2019-7-15_9_21_36.csv
File already downloaded: Historia_przejazdow_2019-7-16_14_26_33.csv
File already downloaded: Historia_przejazdow_2019-7-17_14_26_37.csv
File already downloaded: Historia_przejazdow_2019-7-18_14_27_41.csv
File already dow