In [10]:
import pandas as pd
import mpu
import neo4j
from neo4j import GraphDatabase as G_DB
import py2neo
from scipy.spatial import KDTree as KD
import json
import os

In [11]:
fires=pd.read_csv('./data/fires_merged.csv')
fires.to_csv('./data/test.csv')

STN---  1-6       Int.   Station number (WMO/DATSAV3 number)
                         for the location.

WBAN    8-12      Int.   WBAN number where applicable--this is the
                         historical "Weather Bureau Air Force Navy"
                         number - with WBAN being the acronym.

YEAR    15-18     Int.   The year.

MODA    19-22     Int.   The month and day.

TEMP    25-30     Real   Mean temperature for the day in degrees
                         Fahrenheit to tenths.  Missing = 9999.9
Count   32-33     Int.   Number of observations used in 
                         calculating mean temperature.

DEWP    36-41     Real   Mean dew point for the day in degrees
                         Fahrenheit to tenths.  Missing = 9999.9
Count   43-44     Int.   Number of observations used in 
                         calculating mean dew point.  

SLP     47-52     Real   Mean sea level pressure for the day
                         in millibars to tenths.  Missing =       
                         9999.9
Count   54-55     Int.   Number of observations used in 
                         calculating mean sea level pressure.

STP     58-63     Real   Mean station pressure for the day
                         in millibars to tenths.  Missing =       
                         9999.9
Count   65-66     Int.   Number of observations used in 
                         calculating mean station pressure.  

VISIB   69-73     Real   Mean visibility for the day in miles
                         to tenths.  Missing = 999.9
Count   75-76     Int.   Number of observations used in 
                         calculating mean visibility.      

WDSP    79-83     Real   Mean wind speed for the day in knots
                         to tenths.  Missing = 999.9 
Count   85-86     Int.   Number of observations used in 
                         calculating mean wind speed.

MXSPD   89-93     Real   Maximum sustained wind speed reported 
                         for the day in knots to tenths.
                         Missing = 999.9

GUST    96-100    Real   Maximum wind gust reported for the day
                         in knots to tenths.  Missing = 999.9

MAX     103-108   Real   Maximum temperature reported during the 
                         day in Fahrenheit to tenths--time of max 
                         temp report varies by country and        
                         region, so this will sometimes not be    
                         the max for the calendar day.  Missing = 
                         9999.9     
Flag    109-109   Char   Blank indicates max temp was taken from the
                         explicit max temp report and not from the              
                         'hourly' data.  * indicates max temp was 
                         derived from the hourly data (i.e., highest
                         hourly or synoptic-reported temperature).

MIN     111-116   Real   Minimum temperature reported during the 
                         day in Fahrenheit to tenths--time of min 
                         temp report varies by country and        
                         region, so this will sometimes not be  
                         the min for the calendar day.  Missing = 
                         9999.9
Flag    117-117   Char   Blank indicates min temp was taken from the
                         explicit min temp report and not from the              
                         'hourly' data.  * indicates min temp was 
                         derived from the hourly data (i.e., lowest
                         hourly or synoptic-reported temperature).

PRCP    119-123   Real   Total precipitation (rain and/or melted
                         snow) reported during the day in inches
                         and hundredths; will usually not end 
                         with the midnight observation--i.e., 
                         may include latter part of previous day.
                         .00 indicates no measurable              
                         precipitation (includes a trace).        
                         Missing = 99.99
                         Note:  Many stations do not report '0' on
                         days with no precipitation--therefore,  
                         '99.99' will often appear on these days.
                         Also, for example, a station may only
                         report a 6-hour amount for the period 
                         during which rain fell.
                         See Flag field for source of data.
Flag    124-124   Char   A = 1 report of 6-hour precipitation 
                             amount.
                         B = Summation of 2 reports of 6-hour 
                             precipitation amount.
                         C = Summation of 3 reports of 6-hour 
                             precipitation amount.
                         D = Summation of 4 reports of 6-hour 
                             precipitation amount.
                         E = 1 report of 12-hour precipitation
                             amount.
                         F = Summation of 2 reports of 12-hour
                             precipitation amount.
                         G = 1 report of 24-hour precipitation
                             amount.
                         H = Station reported '0' as the amount
                             for the day (eg, from 6-hour reports),
                             but also reported at least one
                             occurrence of precipitation in hourly
                             observations--this could indicate a
                             trace occurred, but should be considered
                             as incomplete data for the day.
                         I = Station did not report any precip data
                             for the day and did not report any
                             occurrences of precipitation in its hourly
                             observations--it's still possible that
                             precip occurred but was not reported.

SNDP    126-130   Real   Snow depth in inches to tenths--last     
                         report for the day if reported more than
                         once.  Missing = 999.9
                         Note:  Most stations do not report '0' on
                         days with no snow on the ground--therefore,
                         '999.9' will often appear on these days.

FRSHTT  133-138   Int.   Indicators (1 = yes, 0 = no/not          
                         reported) for the occurrence during the 
                         day of:
                         Fog ('F' - 1st digit).
                         Rain or Drizzle ('R' - 2nd digit).
                         Snow or Ice Pellets ('S' - 3rd digit).
                         Hail ('H' - 4th digit).
                         Thunder ('T' - 5th digit).
                         Tornado or Funnel Cloud ('T' - 6th       
                         digit).

In [12]:
weather_stations = pd.read_csv('./data/noaa-global-surface-summary-of-the-day/isd-history.csv')

In [13]:
weather_stations.head()

Unnamed: 0,USAF,WBAN,STATION NAME,CTRY,STATE,ICAO,LAT,LON,ELEV(M),BEGIN,END
0,7018,99999,WXPOD 7018,,,,0.0,0.0,7018.0,20110309,20130730
1,7026,99999,WXPOD 7026,AF,,,0.0,0.0,7026.0,20120713,20170822
2,7070,99999,WXPOD 7070,AF,,,0.0,0.0,7070.0,20140923,20150926
3,8260,99999,WXPOD8270,,,,0.0,0.0,0.0,19960101,20100731
4,8268,99999,WXPOD8278,AF,,,32.95,65.567,1156.7,20100519,20120323


In [14]:
colspecs = [[0,6],[7,12],[14,18],[18,20],[20,22],[24,30],[35,41],[46,52],[57,63],[68,73],[78,83],[88,93],[95,100],[102,108],[110,116],[118,123],[125,130],[132,133],[133,134],[134,135],[135,136],[136,137],[137,138]]
name_dict = {'STN---':'Station','YEAR':'Year','MO':'Month','DA':'Day','TEMP':'Temp','DEWP':'DewPoint','SLP':'SeaLevelPressure','STP':'StationPressure','VISIB':'Visibility','WDSP':'Windspeed','MXSPD':'MaxWindspeed','GUST':'Gust','MAX':'MaxTemp','MIN':'MinTemp','PRCP':'Precip','SNDP':'SnowDepth','F':'Fog','R':'Rain','S':'Snow','H':'Hail','T':'Thunder','T.1':'FunnelCloud'}

In [28]:
os.chdir('./data/noaa-global-surface-summary-of-the-day/gsod_all_years')
stations_US = [str(x) for x in list(range(70,80))]
for year_num in range(2009,2016):
    global_data = pd.DataFrame()
    yf = 'gsod_'+str(year_num)
    os.chdir(yf)
    #print(os.listdir())
    station_folders = os.listdir()
    for sf in station_folders:
        if '.gz' not in sf:
            os.chdir(sf)
            station_files = os.listdir()
            for f in station_files:
                if f[0:2] in stations_US:
                    station_df = pd.read_fwf(f,colspecs=colspecs)
                    global_data = global_data.append(station_df, ignore_index = True)
            os.chdir('..')
            break
    os.chdir('..')
    global_data.to_csv('F:\\Documents\\Git Projects\\wildfire_GNN\\data\\global_weather_data_'+str(year_num)+'.csv')
    del global_data
os.chdir('../../..')