# About
This notebook will produce all the gas data for step 1.1A of the data processing for Module 1.

**Required user input**

Update the cell below once each for the full years of 2019, 2020, 2021, and run the entire notebook for each.


In [None]:
year = '2021' # Update year - this is the year of data you are working on.

In [None]:
# Don't change these.
output_directory='Step_1_1_Outputs' 

first_date = year+'-01-01'
last_date = year+'-12-31'
daily_filename = 'Step_1_1A_Gas_'+year+'_daily'  # Filename shouldn't include '.csv'
hh_subdir='Step_1_1A_Gas_'+year+'_hh'  # Filename shouldn't include '.csv'

# Code

In [None]:
import os
import pandas as pd
from data_picker_edition_4_v01 import SerlDataSelector
import locations
import datetime
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
print(f"Run with Observatory data version: {locations.serl_data_version}\nRun on {now}")

## Daily data 

In [None]:
#Initialise the data picker
selector = SerlDataSelector()

In [None]:
# Create the data
selector.load_data(res='daily',
                   usecols=['PUPRN','Read_date_effective_local','Valid_read_time','Gas_sum_match','Gas_flag','Gas_d_kWh','Gas_hh_sum_m3'],
                   filter_Valid_read_time=True,
                   first_date = first_date,
                   last_date = last_date,
                   inc_time_change_days=True,
                   add_sum_gas_column=True) 
# Save it
selector.save_data(output_filename=daily_filename,
                   output_directory=output_directory,
                   save_method='single_file')

## Half-hourly data

### Load the data with the data picker

In [None]:
# Initialise the data picker
selector = SerlDataSelector('hh')

In [None]:
selector.load_data(res='hh',
                   usecols=['PUPRN','Read_date_time_UTC','Read_date_effective_local','Read_date_time_local','Valid_read_time','Gas_flag','Gas_hh_Wh'],
                   filter_Gas_flag=[1],
                   filter_Valid_read_time=True,
                   first_date = first_date,
                   last_date = last_date,
                   inc_time_change_days=True,
                   add_local_time_cols = False)
# Save it
selector.save_data(output_filename=hh_subdir,
                   output_directory=output_directory,
                   save_method='per_home')