## As the original file: dft_traffic_counts_raw_counts.csv was too heavy, we transformed it into a parquet file to save storage size

### Anyway, you can find the original file at: https://roadtraffic.dft.gov.uk/downloads

In [8]:
import pandas as pd;
import os;

In [9]:
# We use this function to check the size of 
# any file from his path
# Arguments:
# path (str): File path
# Return:
# None
def get_file_stats (path):
    file_stats = os.stat(path)
    print(f'File Size in MegaBytes is {file_stats.st_size / (1024 * 1024)}')

In [10]:
file_path = r'aux_files\source\UK_Traffic_Data\dft_traffic_counts_raw_counts.csv'
get_file_stats (file_path)
df = pd.read_csv(file_path, header=0, sep=',', dtype=str).\
    replace('nan', pd.NA) ## Avoid having "nan" text instead of null values, if they exist. 
df

File Size in MegaBytes is 1208.8800992965698


Unnamed: 0,Count_point_id,Direction_of_travel,Year,Count_date,hour,Region_id,Region_name,Region_ons_code,Local_authority_id,Local_authority_name,...,Buses_and_coaches,LGVs,HGVs_2_rigid_axle,HGVs_3_rigid_axle,HGVs_4_or_more_rigid_axle,HGVs_3_or_4_articulated_axle,HGVs_5_articulated_axle,HGVs_6_articulated_axle,All_HGVs,All_motor_vehicles
0,749,E,2014,2014-06-25 00:00:00,7,3,Scotland,S92000003,39,East Ayrshire,...,5,31,15,4,4,4,13,12,52,935
1,749,E,2014,2014-06-25 00:00:00,8,3,Scotland,S92000003,39,East Ayrshire,...,7,103,29,3,2,7,18,20,79,1102
2,749,E,2014,2014-06-25 00:00:00,9,3,Scotland,S92000003,39,East Ayrshire,...,14,88,21,3,5,9,17,19,74,773
3,749,E,2014,2014-06-25 00:00:00,10,3,Scotland,S92000003,39,East Ayrshire,...,8,90,31,6,10,7,18,17,89,778
4,749,E,2014,2014-06-25 00:00:00,11,3,Scotland,S92000003,39,East Ayrshire,...,18,75,38,2,2,3,16,24,85,875
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4815499,811618,E,2020,2020-06-05 00:00:00,14,3,Scotland,S92000003,31,Perth & Kinross,...,1,8,0,0,0,0,0,0,0,57
4815500,811618,E,2020,2020-06-05 00:00:00,15,3,Scotland,S92000003,31,Perth & Kinross,...,0,14,0,0,0,0,0,0,0,74
4815501,811618,E,2020,2020-06-05 00:00:00,16,3,Scotland,S92000003,31,Perth & Kinross,...,0,15,2,0,0,0,0,1,3,69
4815502,811618,E,2020,2020-06-05 00:00:00,17,3,Scotland,S92000003,31,Perth & Kinross,...,1,8,0,0,0,0,0,0,0,73


In [11]:
df.to_parquet(r'aux_files\source\UK_Traffic_Data\dft_traffic_counts_raw_counts.parquet', compression='snappy')

### Checking if everything is fine

In [12]:
file_path = r'aux_files\source\UK_Traffic_Data\dft_traffic_counts_raw_counts.parquet'
get_file_stats (file_path)
df = pd.read_parquet(file_path)
df

File Size in MegaBytes is 62.42549800872803


Unnamed: 0,Count_point_id,Direction_of_travel,Year,Count_date,hour,Region_id,Region_name,Region_ons_code,Local_authority_id,Local_authority_name,...,Buses_and_coaches,LGVs,HGVs_2_rigid_axle,HGVs_3_rigid_axle,HGVs_4_or_more_rigid_axle,HGVs_3_or_4_articulated_axle,HGVs_5_articulated_axle,HGVs_6_articulated_axle,All_HGVs,All_motor_vehicles
0,749,E,2014,2014-06-25 00:00:00,7,3,Scotland,S92000003,39,East Ayrshire,...,5,31,15,4,4,4,13,12,52,935
1,749,E,2014,2014-06-25 00:00:00,8,3,Scotland,S92000003,39,East Ayrshire,...,7,103,29,3,2,7,18,20,79,1102
2,749,E,2014,2014-06-25 00:00:00,9,3,Scotland,S92000003,39,East Ayrshire,...,14,88,21,3,5,9,17,19,74,773
3,749,E,2014,2014-06-25 00:00:00,10,3,Scotland,S92000003,39,East Ayrshire,...,8,90,31,6,10,7,18,17,89,778
4,749,E,2014,2014-06-25 00:00:00,11,3,Scotland,S92000003,39,East Ayrshire,...,18,75,38,2,2,3,16,24,85,875
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4815499,811618,E,2020,2020-06-05 00:00:00,14,3,Scotland,S92000003,31,Perth & Kinross,...,1,8,0,0,0,0,0,0,0,57
4815500,811618,E,2020,2020-06-05 00:00:00,15,3,Scotland,S92000003,31,Perth & Kinross,...,0,14,0,0,0,0,0,0,0,74
4815501,811618,E,2020,2020-06-05 00:00:00,16,3,Scotland,S92000003,31,Perth & Kinross,...,0,15,2,0,0,0,0,1,3,69
4815502,811618,E,2020,2020-06-05 00:00:00,17,3,Scotland,S92000003,31,Perth & Kinross,...,1,8,0,0,0,0,0,0,0,73
