# Generate the input matrix for a given date, temperature and rain

In [1]:
import numpy as np
import pandas as pd

import datetime

---

In [2]:
training_set_df = pd.read_csv('../training_set.csv', sep=';')

In [3]:
training_set_df.head()

Unnamed: 0,NoTroncon,DateCreation,DayOfYear,DayOfWeek,NoHeure,TxOccupation,CenterLongitude,CenterLatitude,EmplacementCount,Temp,Precip_total_day_mm
0,1,2015-01-02,2,4,9,0.0,-73.570339,45.507996,17,-11.0,0.1
1,1,2015-01-02,2,4,10,0.0,-73.570339,45.507996,17,-11.0,0.1
2,1,2015-01-02,2,4,11,0.0,-73.570339,45.507996,17,-11.0,0.1
3,1,2015-01-02,2,4,12,0.051,-73.570339,45.507996,17,-10.0,0.1
4,1,2015-01-02,2,4,13,0.0588,-73.570339,45.507996,17,-12.0,0.1


In [4]:
section_list = training_set_df['NoTroncon'].unique()

In [5]:
print(section_list)

[   1    2    3    4    5    6    7    8   10   12   19   20   21   25
   26   27   28   29   30   33  155  156  182  204  205  358  359  360
  361  362  363  364  365  366  367  402  403  404  406  407  408  409
  410  411  414  415  561  562  563  564  574  585  598  599  612  613
  614  615  616  617  618  619  620  621  890  891  892  893  894  895
  896  897  898  899  901  902  903  904  905  907  908  909  910  912
  913  914  915  916 1378 1379 1417 1419 1420 1436 1437 1438 1439 1457
 1458 1461 1462 1463 1464 1465 1466 1512 1529 1559 1567 1568 1592 1593
 1595 1596 1622 1623 1624 1625 1686 2020 2028 2031 2034 2038 2039 2276
 2284 2375 2578 2592 2593 2594 2595 2601 2612 2613 2618 2619 2626 2627
 2628 2716 2717 2718 2719 2720 2721 2732 2770 2771 2772 2773 2774 2775
 2781 2821 2874 2918 2919]


---

In [6]:
section_spot_gps = pd.read_csv('../section_emplacement_gps.csv', sep=';')

In [7]:
section_spot_gps.head()

Unnamed: 0,NoTroncon,CenterLongitude,CenterLatitude,EmplacementCount,sNoEmplacement,longitude,latitude
0,1,-73.570339,45.507996,17,G406,-73.571028,45.508322
1,1,-73.570339,45.507996,17,G408,-73.570925,45.508273
2,1,-73.570339,45.507996,17,G409,-73.570889,45.508256
3,1,-73.570339,45.507996,17,G411,-73.570721,45.508175
4,1,-73.570339,45.507996,17,G412,-73.570619,45.50813


In [8]:
section_gps = section_spot_gps[['NoTroncon', 'CenterLongitude', 'CenterLatitude', 'EmplacementCount']].copy()
section_gps.drop_duplicates(keep='first', inplace=True)

In [9]:
section_gps.head()

Unnamed: 0,NoTroncon,CenterLongitude,CenterLatitude,EmplacementCount
0,1,-73.570339,45.507996,17
17,2,-73.570659,45.507961,10
27,3,-73.569128,45.507425,6
33,4,-73.569857,45.507581,5
38,5,-73.568403,45.507083,8


In [10]:
len(section_gps)

159

---

In [11]:
def get_input_matrix(date_str, temperature, rain_mm, section_list):    
    # extract day of year and day of week from date string
    date = datetime.date(*map(int, date_str.split('-')))
    day_of_year = date.timetuple().tm_yday
    day_of_week = date.timetuple().tm_wday
    
    rows_list = []
    
    for section_id in section_list:
        # get CenterLongitude, CenterLattitude, EmplacementCount for the given section id
        section_gps_row = section_gps.loc[section_gps['NoTroncon'] == section_id]
        
        hour_range = range(9, 22) if 0 <= day_of_week <= 4 else range(9, 18)
        for hour in hour_range:
            rows_list.append({
                'DayOfYear': day_of_year,
                'DayOfWeek': day_of_week,
                'NoHeure': hour,
                'CenterLongitude': section_gps_row.iat[0,1],
                'CenterLatitude': section_gps_row.iat[0,2],
                'EmplacementCount': section_gps_row.iat[0,3],
                'Temp': temperature,
                'Precip_total_day_mm': rain_mm
            })

    # create data frame from row list and reorder the columns
    input_matrix_df = pd.DataFrame(rows_list)
    input_matrix_df = input_matrix_df[['DayOfYear', 'DayOfWeek', 'NoHeure', 'CenterLongitude', 'CenterLatitude', 
                                       'EmplacementCount', 'Temp', 'Precip_total_day_mm']]
    
    return input_matrix_df

In [12]:
input_matrix_df = get_input_matrix("2015-01-02", -10, 0.5, section_list)

In [13]:
input_matrix_df.head()

Unnamed: 0,DayOfYear,DayOfWeek,NoHeure,CenterLongitude,CenterLatitude,EmplacementCount,Temp,Precip_total_day_mm
0,2,4,9,-73.570339,45.507996,17,-10,0.5
1,2,4,10,-73.570339,45.507996,17,-10,0.5
2,2,4,11,-73.570339,45.507996,17,-10,0.5
3,2,4,12,-73.570339,45.507996,17,-10,0.5
4,2,4,13,-73.570339,45.507996,17,-10,0.5


In [14]:
len(input_matrix_df)

2067

In [15]:
input_matrix_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2067 entries, 0 to 2066
Data columns (total 8 columns):
DayOfYear              2067 non-null int64
DayOfWeek              2067 non-null int64
NoHeure                2067 non-null int64
CenterLongitude        2067 non-null float64
CenterLatitude         2067 non-null float64
EmplacementCount       2067 non-null int64
Temp                   2067 non-null int64
Precip_total_day_mm    2067 non-null float64
dtypes: float64(3), int64(5)
memory usage: 129.3 KB


---

In [16]:
input_matrix_df.values

array([[  2. ,   4. ,   9. , ...,  17. , -10. ,   0.5],
       [  2. ,   4. ,  10. , ...,  17. , -10. ,   0.5],
       [  2. ,   4. ,  11. , ...,  17. , -10. ,   0.5],
       ...,
       [  2. ,   4. ,  19. , ...,   2. , -10. ,   0.5],
       [  2. ,   4. ,  20. , ...,   2. , -10. ,   0.5],
       [  2. ,   4. ,  21. , ...,   2. , -10. ,   0.5]])

In [17]:
input_matrix_df.values.shape

(2067, 8)