# DataUt 
## Extracting data from Datainn and puting it to use in Pandas dataframes

In [1]:
# Import of needed libraries

# Libraries for retieving data from the GraphQL API
import urllib3
from gql import gql, Client
from gql.transport.requests import RequestsHTTPTransport

# Imports for plotting
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
%matplotlib inline
plt.rcParams["figure.figsize"] = [14, 7]

import seaborn as sns
# Use seaborn style defaults and set the default figure size
sns.set(rc={'figure.figsize':(14, 7)})
sns.set_context("paper")

# Magic stuff from the Pandas people, flatteing the JSON files into something that can pass off as a table
from pandas.io.json import json_normalize

# Tools to work with dates
from datetime import datetime
from dateutil.relativedelta import relativedelta
import pytz


# Remove warnings related to HTTPS, no need as data is public
urllib3.disable_warnings()

# And finaly the Pandas library for working with the data 
import pandas as pd

# Fixing filesnames that include wierd chars
from slugify import slugify


In [2]:
# Setup of the GQL client to talk to the NPRA Datainn API

sample_transport=RequestsHTTPTransport(
    url='https://www.vegvesen.no/trafikkdata/api/',
    use_json=True,
    headers={
        "Content-type": "application/json",
    },
    verify=False
)

client = Client(
    retries=3,
    transport=sample_transport,
    fetch_schema_from_transport=True,
)

In [3]:
# Function to extract daily volumes from one day to the other

def get_day(startday, stopday, point_id):
    # Dateformat: "2010-01-01T00:00:00+02:00"
    #
    # This functions works slowly - the pagination stuff in GQL is rather code intensive. Som this just brute force
    # the whole idea of pagination by sending a call for every day. If any GQL experts want to fix this please do 
    # and let me know

    query_text_template = '''{trafficData(trafficRegistrationPointId: "{point_id}") {
        volume {
          byDay(from: "{text_page_start}", to: "{text_page_end}") {
            edges {
              node {
                from
                to
                total {
                  volumeNumbers {
                    volume
                  }
                  coverage {
                    percentage
                  }
                }
              }
            }
          }
        }
      }
    }'''


    # Add point ID into template
    query_text_template = query_text_template.replace("{point_id}", point_id)
    
    start= datetime.fromisoformat(startday)
    stop = datetime.fromisoformat(stopday)

    page_start = start
    final_stop = stop

    #Create an empty dataframe with only column names
    df = pd.DataFrame(columns=['from', 'to', 'daily_volume', 'coverage'])

    while page_start < final_stop:
        page_stop = page_start + relativedelta(days=1)

        #Entering start end dates into query text
        query = query_text_template.replace("{text_page_start}", page_start.isoformat(sep='T'))
        query = query.replace("{text_page_end}", page_stop.isoformat(sep='T'))

        #Run query to get data and create temporary dataframe
        Q = gql(query)
        data = client.execute(Q)
        temp_df = json_normalize(data, record_path=['trafficData', 'volume', 'byDay', 'edges'])

        # Fix problem when there is no data
        if len(temp_df.index) > 0:
            column_names = ['from', 'to', 'daily_volume', 'coverage']
            temp_df.columns=column_names
            df = pd.concat([df, temp_df])

        # make move to next page
        page_start = page_stop
    df.set_index('from', inplace = True, drop=False )
    df['csum'] = df['daily_volume'].cumsum()
    return df



In [4]:

def plott_anual_comparison_to_pdf(startdate, enddate, point, point_name, filename):
    '''
    startdata and enddate is datetime object
    example point id: "35002V72811"
    '''
    timezone = pytz.timezone("Europe/Oslo")
    
    startdate = timezone.localize(startdate)
    enddate = timezone.localize(enddate)
            
    start2019 = startdate - pd.DateOffset(years=1)
    end2019 = enddate - pd.DateOffset(years=1)
        
    volume_2020 = get_day(startdate.isoformat(sep='T'), enddate.isoformat(sep='T'), point)
    volume_2019 = get_day(start2019.isoformat(sep='T'), end2019.isoformat(sep='T'), point)
    
    #Shifting the 2019 data one year forward and plotting
    volume_2020['fra'] = pd.to_datetime(volume_2020['from'])
    volume_2020.set_index('fra', inplace = True, drop=False )

    volume_2019['fra'] = pd.to_datetime(volume_2019['from']) + pd.DateOffset(years=1)
    volume_2019.set_index('fra', inplace = True, drop=False )

    # Plot setup
    plt.figure(figsize=(11.7,8.3))
    
    # Plotting of the recieved data
    plt.plot(volume_2020['csum'], label='2020 Data')
    plt.plot(volume_2019['csum'], label='2019 Data')

    # Adding a legend
    title = point_name + '  -  ' + point
       
    plt.suptitle(title)
    plt.title( "Statens vegvesen - Transportutvikling, Transportstyring")
    
    plt.xlabel('Dato')
    plt.ylabel("Total antall kjøretøy")
    plt.legend()
    
    filename = slugify(filename)
    f_name = './results/' + filename + '.pdf'
    plt.savefig(f_name)
    plt.close()


In [5]:
start =  datetime(2020, 1, 1)
end =   datetime(2020, 4, 1)
#plott_anual_comparison_to_pdf(start, end, "35002V72811", "Trondheim")

In [6]:
# Restore list of vehicle counting point from Vehicle_Counting_point notebook

%store -r vehicle_cointing_points
vehicle_cointing_points.head()

Unnamed: 0,id,name,lat,lon,road_reference,road_category
0,56100V804816,RAMPE SØRÅS MOT HOP,60.325803,5.337152,EV39 S78D1 m6434 KD3 m172,Europaveg
1,03486V319647,ØLEN/ETNE,59.623114,5.886108,EV134 S4D1 m17825,Europaveg
2,20318V625294,FESTNINGTUNNEL,59.91118,10.724291,EV18 S55D1 m5076,Europaveg
3,00030V705190,MATRAND S,60.020618,12.113421,RV2 S2D1 m4510,Riksveg
4,29614V805708,SOTRABRUA VEST,60.372022,5.155573,RV555 S1D1 m11739,Riksveg


In [7]:

start =  datetime(2020, 1, 1)
end =   datetime(2020, 4, 1)

for i in range(len(vehicle_cointing_points)) : 
    
    id =vehicle_cointing_points.loc[i, "id"]
    name = vehicle_cointing_points.loc[i, "name"]
    road_reference = vehicle_cointing_points.loc[i, "road_reference"]
    title = name + '\n' + road_reference 
    file_name = road_reference + '' + name
    
    print(id + '  -  ' + name ) 
    try:
        plott_anual_comparison_to_pdf(start, end, id, title, file_name)
    except:
        print("Data extraction failed for point: {}".format(id))

56100V804816  -  RAMPE SØRÅS MOT HOP



To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()


03486V319647  -  ØLEN/ETNE
20318V625294  -  FESTNINGTUNNEL
00030V705190  -  MATRAND S
29614V805708  -  SOTRABRUA VEST
02087V625292  -  EV 18 V/ MASTEMYR
62464V2725991  -  Pårampe fra Eiganestunellen mot Hundvåg
76092V705200  -  ALVDAL
31504V578608  -  SNÅSAHEIA
42004V22151  -  Sprøkilen
70741V384496  -  NAUSTDALSTUNNELEN
05960V384024  -  REED VEST
63227V885182  -  Skitdalshøgda
37055V971362  -  MOENKRYSSET ØST
10239V2725979  -  Hundvågtunellen fra Gamlingen mot Hundvåg
58272V2583663  -  Granstunnelen sørgående
21565V1060643  -  JORA
27436V2172082  -  HANEKLEIV SYD
80565B1689290  -  Nenset sykkeltellepunkt
22500V805651  -  HOVLAND
73380V121327  -  Rampe mot Prestheia
53155V1665289  -  RULLESTADTUNNELEN
97687V72808  -  Horg
27227V885156  -  Trældaltunnelen
72741V1811592  -  DALKRYSSET AVRAMPE
78805V2347241  -  REVET
21381V2554057  -  JAREN
14551V705197  -  NORDSTUMOEN S
71241V2460301  -  Kong Håkon 5.s gt Nordgående
18523V2282344  -  Ørbekk Nordgående Rampe
19690V804744  -  NYGÅRDST.SENT

12357V625212  -  Klemetsrud
84237V578097  -  TEVELDALEN
26714V180853  -  STRØMSÅSTUNNELEN
31290V625405  -  Hovedkjørefelt Rv150 Grefsen
66346V625214  -  E6 Ulvensplitten Rampe Alnabru Teisen
92486V1126283  -  LANGMYRA NORD
86022V521170  -  Høgenheitunnelen
65823V1668921  -  FJØSDALEN
91007V578610  -  Gartland mot Harran
46725V3026132  -  LOFTESNES
68557V444232  -  BLOMMENHOLM
68733V2078106  -  RAMPE NYBORG-VÅGSBOTN
49453V930259  -  BARTA
37885V1204218  -  BORGENKRYSSET AVRAMPE
21768V805002  -  TORBORG NEDREAASGT.
92019B1685807  -  E6 Kirkenes Sykkel
75774V1060655  -  SMEDMOEN
87610V1811579  -  DALKRYSSET
89879V1060627  -  ILKA (E6)
15060V2726065  -  Eiganestunnelen nordgående løp
45557V625215  -  E6 Furuset Sydgående Hovedfelt
39245V319652  -  Rossabø
20829V521117  -  MANNEBRU
62393V72804  -  Oppdal E6 nordlig
52148V1797330  -  STØYLSNESTUNNELEN
39643V2725983  -  Ryfylketunellen mot Stavanger 4
76587V3152664  -  Pårampe fra Montebello
54236V384508  -  HAUGEN
Data extraction failed for 

85389V805037  -  STORAVATN (RV555 RAMPE 34)
57929V705247  -  ØYERMOEN
43040V705210  -  HASLEMOSLETTA
99229V521431  -  Steane N
17330V930335  -  Masi
92394V885954  -  E10 Higrav Leirvikhaugen
07872V2570034  -  VISPERUD AVRAMPE
83528V1773686  -  VØYEN
79902V804834  -  FLØYFJELL-VESTRE
74031V1857387  -  LYSAKERLOKKET RAMPE
63839B2077721  -  Bøgata sykkeltellepunkt
65484V930593  -  TRANSFARELV
26266V443149  -  E6 NYE MOSS NORD
94013V521403  -  Søve
66883V930281  -  SOLHEIMSLIA
67055V804829  -  DANMPLASS-VESTRE
05120V625649  -  E6 Alnabru Nordgående Helsfyr Furuset
95559B383662  -  LOFTESNES (SYKKEL)
16868V805119  -  LAGUNEN-TROLDHAUGTUN. SØRÅS
00902V578622  -  Hegra øst
68234V443152  -  TARALDRUD
14061V443595  -  RV159 VAREMESSA AVRAMPE
00509V885112  -  NORDLANDSPORTEN
19085V885118  -  FELLINGFORS
87318V521440  -  Klevstrand  
56025V2078107  -  RAMPE ÅSANEVN-NYBORG
46321V444232  -  RAMSTADSLETTA
14340V181285  -  V/AVKJ. VIKERSUND S
43259V2078119  -  RAMPE ÅSAMYRANE-ÅSANEVEIEN
49929V1728794

72532V1060043  -  LESJAVERK (E136)
59192V1878201  -  Bjørvikatunell Vestgående
77347V930245  -  E6 Gjøkenes (Porsanger)
31619V320585  -  GRINDE NORD
64432B2409911  -  Tverlandsbrua/Vikan sykkel
46802V320601  -  BERGE SØR
76552V72244  -  Hemnekjølen
89040V2545188  -  Kongshavn 1,2
51143V319682  -  Nesflaten
92246V2682281  -  Follotunnelen nordgående
15862V72153  -  MELHUS NORD
09588V1751352  -  TINGSAKER-X E18
99160V930279  -  HØYBUKTMOEN E6
25641V1699273  -  ROMARHEIMSDALEN 
16488V249020  -  Futura ATK
90500V805707  -  HARAFJELLTUNNELEN
58509V804762  -  RAMPE ARNA TIL OSTERØY
51451V2675461  -  Tjernfjelltunnelen
08989V2586412  -  ISAKBEKKEN
01777V885181  -  BJØRNFJELL
90697V625213  -  E6 Alnabru Nordgående Ulven Furuset
39256V181280  -  SPIKKESTAD
83441V578079  -  Branes Ø arm Levanger-Verdal
32088V444219  -  HVAM SØR
57430V319659  -  Raglamyr avkj. rampe fra Aksdal
98144V2554057  -  ELVESTAD VEST
69326V1060101  -  SANDBU (RV15)
21571V2394246  -  Strindheimtunnelen mot Leangen
57064V80

03829V804778  -  RAMPE HOP MOT SENTR
99781V2303021  -  Eikeberget
41909V2583664  -  Granstunnelen nordgående
91582V930281  -  E6 Kirkenes X FV367 Vest
03398V249527  -  Glamoxbrua
52685V444218  -  Nordbytunellen
52742V2282262  -  ØRBEKK EV6
26685V704589  -  BREVAD
22230V121755  -  SVINDLAND
90523V804810  -  STORD-HEIANE/SAGVÅG
23045V2441493  -  ARM STORD-BØMLO
66206V805614  -  RØYKENES
63904V885194  -  E10 Vaterfjorden
04883V1704241  -  Verdal arm Verdalsøra-Steinkjer
37773V625378  -  AVKJ. FRA RV 4
61487V319872  -  Risavika
60900V248916  -  Molde Fk.
39676V885922  -  SØRELVA
60757V384046  -  SUNDAFJELLTUNNELEN
10095V2785540  -  R94 SKAIDI
11138B1926267  -  HAFSTAD VEST (SYKKEL)
18512V1665976  -  BORGENKRYSSET
91810V2497857  -  Vågstrandstunnelen
00875V249529  -  Eidssetra
35294V249020  -  Vikansvingen
72406V625599  -  E6 Lodalen Rampe Helsfyr Lodalen
74801V2676825  -  Thallerkrysser
96715V521387  -  Morgedal
14875V249119  -  Flyplasskrysset
84578V319700  -  Slettebø
77027V1207593  -  R