### Merging 2022 Weather Data and Initial Inspection and Cleaning

In [1]:
import os
import pandas as pd
file_list = os.listdir()
flat_file_list = list(filter(lambda x: x.endswith('csv'), file_list)) 
data_frames = []
for file_name in flat_file_list:
    df = pd.read_csv(file_name)
    data_frames.append(df)
merged_frame = pd.concat(data_frames)

In [2]:
merged_frame.head()

Unnamed: 0,STATION,DATE,LATITUDE,LONGITUDE,ELEVATION,NAME,TEMP,TEMP_ATTRIBUTES,DEWP,DEWP_ATTRIBUTES,...,MXSPD,GUST,MAX,MAX_ATTRIBUTES,MIN,MIN_ATTRIBUTES,PRCP,PRCP_ATTRIBUTES,SNDP,FRSHTT
0,1001099999,2022-01-01,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",12.1,19,3.8,19,...,27.0,35.5,18.0,,9.0,,0.01,E,999.9,1000
1,1001099999,2022-01-02,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",24.7,18,20.0,18,...,44.5,60.0,33.3,,16.2,,0.11,E,999.9,1000
2,1001099999,2022-01-03,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",30.9,18,21.7,18,...,41.0,56.9,33.8,,26.8,,0.0,I,999.9,0
3,1001099999,2022-01-04,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",23.6,15,14.4,15,...,29.7,39.0,26.4,,21.6,,0.0,E,999.9,1000
4,1001099999,2022-01-05,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",23.1,19,16.5,19,...,12.8,15.9,24.8,,20.1,,0.04,E,999.9,1000


There appear to be zero and NaN latitude/longitude data.  For now lets verify that we have nonempty entries in these fields.  I can always lookup by station number later if I need to find data near a point.

In [3]:
merged_frame[(merged_frame['LATITUDE'] != 0)].dropna().head()

Unnamed: 0,STATION,DATE,LATITUDE,LONGITUDE,ELEVATION,NAME,TEMP,TEMP_ATTRIBUTES,DEWP,DEWP_ATTRIBUTES,...,MXSPD,GUST,MAX,MAX_ATTRIBUTES,MIN,MIN_ATTRIBUTES,PRCP,PRCP_ATTRIBUTES,SNDP,FRSHTT
0,1001099999,2022-01-01,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",12.1,19,3.8,19,...,27.0,35.5,18.0,,9.0,,0.01,E,999.9,1000
1,1001099999,2022-01-02,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",24.7,18,20.0,18,...,44.5,60.0,33.3,,16.2,,0.11,E,999.9,1000
2,1001099999,2022-01-03,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",30.9,18,21.7,18,...,41.0,56.9,33.8,,26.8,,0.0,I,999.9,0
3,1001099999,2022-01-04,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",23.6,15,14.4,15,...,29.7,39.0,26.4,,21.6,,0.0,E,999.9,1000
4,1001099999,2022-01-05,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",23.1,19,16.5,19,...,12.8,15.9,24.8,,20.1,,0.04,E,999.9,1000


There are entries with valid latitude and longitude.  Table will be tidied up and then exported to a merged CSV for DB import.

Temp attributes give the number of measurements averaged for mean temperature.  This will not assist me in modeling, so I am going to drop this column.

In [4]:
merged_frame.drop(merged_frame.columns[7], axis = 1, inplace = True)

In [5]:
merged_frame.head()

Unnamed: 0,STATION,DATE,LATITUDE,LONGITUDE,ELEVATION,NAME,TEMP,DEWP,DEWP_ATTRIBUTES,SLP,...,MXSPD,GUST,MAX,MAX_ATTRIBUTES,MIN,MIN_ATTRIBUTES,PRCP,PRCP_ATTRIBUTES,SNDP,FRSHTT
0,1001099999,2022-01-01,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",12.1,3.8,19,1010.2,...,27.0,35.5,18.0,,9.0,,0.01,E,999.9,1000
1,1001099999,2022-01-02,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",24.7,20.0,18,990.3,...,44.5,60.0,33.3,,16.2,,0.11,E,999.9,1000
2,1001099999,2022-01-03,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",30.9,21.7,18,1002.1,...,41.0,56.9,33.8,,26.8,,0.0,I,999.9,0
3,1001099999,2022-01-04,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",23.6,14.4,15,1012.2,...,29.7,39.0,26.4,,21.6,,0.0,E,999.9,1000
4,1001099999,2022-01-05,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",23.1,16.5,19,1007.6,...,12.8,15.9,24.8,,20.1,,0.04,E,999.9,1000


The same situation applies with the other attribute fields.  Removing them as well.

In [6]:
merged_frame.drop(['DEWP_ATTRIBUTES', 'MAX_ATTRIBUTES', 'MIN_ATTRIBUTES', 'PRCP_ATTRIBUTES'], axis = 1, inplace=True)

In [7]:
merged_frame.head()

Unnamed: 0,STATION,DATE,LATITUDE,LONGITUDE,ELEVATION,NAME,TEMP,DEWP,SLP,SLP_ATTRIBUTES,...,VISIB_ATTRIBUTES,WDSP,WDSP_ATTRIBUTES,MXSPD,GUST,MAX,MIN,PRCP,SNDP,FRSHTT
0,1001099999,2022-01-01,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",12.1,3.8,1010.2,19,...,4,16.6,19,27.0,35.5,18.0,9.0,0.01,999.9,1000
1,1001099999,2022-01-02,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",24.7,20.0,990.3,18,...,5,23.0,17,44.5,60.0,33.3,16.2,0.11,999.9,1000
2,1001099999,2022-01-03,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",30.9,21.7,1002.1,18,...,0,33.6,18,41.0,56.9,33.8,26.8,0.0,999.9,0
3,1001099999,2022-01-04,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",23.6,14.4,1012.2,15,...,0,19.8,15,29.7,39.0,26.4,21.6,0.0,999.9,1000
4,1001099999,2022-01-05,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",23.1,16.5,1007.6,19,...,0,6.6,13,12.8,15.9,24.8,20.1,0.04,999.9,1000


For Temp, dewpoint, SLP, and STP missing values are reported as 9999.9.  These are going to be replaced with NaN for clarity and DB processing purposes.

In [8]:
import numpy as np

def set_null_9999(arg):
    if arg == 9999.9:
        return np.NaN
    else:
        return arg
    
merged_frame['TEMP'] = merged_frame['TEMP'].apply(set_null_9999) 

In [9]:
merged_frame.head()

Unnamed: 0,STATION,DATE,LATITUDE,LONGITUDE,ELEVATION,NAME,TEMP,DEWP,SLP,SLP_ATTRIBUTES,...,VISIB_ATTRIBUTES,WDSP,WDSP_ATTRIBUTES,MXSPD,GUST,MAX,MIN,PRCP,SNDP,FRSHTT
0,1001099999,2022-01-01,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",12.1,3.8,1010.2,19,...,4,16.6,19,27.0,35.5,18.0,9.0,0.01,999.9,1000
1,1001099999,2022-01-02,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",24.7,20.0,990.3,18,...,5,23.0,17,44.5,60.0,33.3,16.2,0.11,999.9,1000
2,1001099999,2022-01-03,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",30.9,21.7,1002.1,18,...,0,33.6,18,41.0,56.9,33.8,26.8,0.0,999.9,0
3,1001099999,2022-01-04,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",23.6,14.4,1012.2,15,...,0,19.8,15,29.7,39.0,26.4,21.6,0.0,999.9,1000
4,1001099999,2022-01-05,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",23.1,16.5,1007.6,19,...,0,6.6,13,12.8,15.9,24.8,20.1,0.04,999.9,1000


In [10]:
merged_frame['DEWP'] = merged_frame['DEWP'].apply(set_null_9999)
merged_frame['SLP'] = merged_frame['SLP'].apply(set_null_9999) 
merged_frame['STP'] = merged_frame['STP'].apply(set_null_9999)

In [11]:
merged_frame.head()

Unnamed: 0,STATION,DATE,LATITUDE,LONGITUDE,ELEVATION,NAME,TEMP,DEWP,SLP,SLP_ATTRIBUTES,...,VISIB_ATTRIBUTES,WDSP,WDSP_ATTRIBUTES,MXSPD,GUST,MAX,MIN,PRCP,SNDP,FRSHTT
0,1001099999,2022-01-01,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",12.1,3.8,1010.2,19,...,4,16.6,19,27.0,35.5,18.0,9.0,0.01,999.9,1000
1,1001099999,2022-01-02,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",24.7,20.0,990.3,18,...,5,23.0,17,44.5,60.0,33.3,16.2,0.11,999.9,1000
2,1001099999,2022-01-03,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",30.9,21.7,1002.1,18,...,0,33.6,18,41.0,56.9,33.8,26.8,0.0,999.9,0
3,1001099999,2022-01-04,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",23.6,14.4,1012.2,15,...,0,19.8,15,29.7,39.0,26.4,21.6,0.0,999.9,1000
4,1001099999,2022-01-05,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",23.1,16.5,1007.6,19,...,0,6.6,13,12.8,15.9,24.8,20.1,0.04,999.9,1000


For VISIB, WDSP, MXSPD, and GUST the missing value is encoded as 999.9.  As before replacing with NaN.

In [12]:
def set_null_999(arg):
    if arg == 999.9:
        return np.NaN
    else:
        return arg
    
merged_frame['VISIB'] = merged_frame['VISIB'].apply(set_null_999) 

In [13]:
merged_frame.head()

Unnamed: 0,STATION,DATE,LATITUDE,LONGITUDE,ELEVATION,NAME,TEMP,DEWP,SLP,SLP_ATTRIBUTES,...,VISIB_ATTRIBUTES,WDSP,WDSP_ATTRIBUTES,MXSPD,GUST,MAX,MIN,PRCP,SNDP,FRSHTT
0,1001099999,2022-01-01,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",12.1,3.8,1010.2,19,...,4,16.6,19,27.0,35.5,18.0,9.0,0.01,999.9,1000
1,1001099999,2022-01-02,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",24.7,20.0,990.3,18,...,5,23.0,17,44.5,60.0,33.3,16.2,0.11,999.9,1000
2,1001099999,2022-01-03,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",30.9,21.7,1002.1,18,...,0,33.6,18,41.0,56.9,33.8,26.8,0.0,999.9,0
3,1001099999,2022-01-04,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",23.6,14.4,1012.2,15,...,0,19.8,15,29.7,39.0,26.4,21.6,0.0,999.9,1000
4,1001099999,2022-01-05,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",23.1,16.5,1007.6,19,...,0,6.6,13,12.8,15.9,24.8,20.1,0.04,999.9,1000


In [14]:
merged_frame['WDSP'] = merged_frame['WDSP'].apply(set_null_999)
merged_frame['MXSPD'] = merged_frame['MXSPD'].apply(set_null_999)
merged_frame['GUST'] = merged_frame['GUST'].apply(set_null_999)

In [15]:
merged_frame.head()

Unnamed: 0,STATION,DATE,LATITUDE,LONGITUDE,ELEVATION,NAME,TEMP,DEWP,SLP,SLP_ATTRIBUTES,...,VISIB_ATTRIBUTES,WDSP,WDSP_ATTRIBUTES,MXSPD,GUST,MAX,MIN,PRCP,SNDP,FRSHTT
0,1001099999,2022-01-01,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",12.1,3.8,1010.2,19,...,4,16.6,19,27.0,35.5,18.0,9.0,0.01,999.9,1000
1,1001099999,2022-01-02,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",24.7,20.0,990.3,18,...,5,23.0,17,44.5,60.0,33.3,16.2,0.11,999.9,1000
2,1001099999,2022-01-03,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",30.9,21.7,1002.1,18,...,0,33.6,18,41.0,56.9,33.8,26.8,0.0,999.9,0
3,1001099999,2022-01-04,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",23.6,14.4,1012.2,15,...,0,19.8,15,29.7,39.0,26.4,21.6,0.0,999.9,1000
4,1001099999,2022-01-05,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",23.1,16.5,1007.6,19,...,0,6.6,13,12.8,15.9,24.8,20.1,0.04,999.9,1000


In [16]:
merged_frame.drop(['SLP_ATTRIBUTES', 'VISIB_ATTRIBUTES', 'WDSP_ATTRIBUTES'], axis=1, inplace=True)

In [17]:
merged_frame.head()

Unnamed: 0,STATION,DATE,LATITUDE,LONGITUDE,ELEVATION,NAME,TEMP,DEWP,SLP,STP,STP_ATTRIBUTES,VISIB,WDSP,MXSPD,GUST,MAX,MIN,PRCP,SNDP,FRSHTT
0,1001099999,2022-01-01,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",12.1,3.8,1010.2,9.0,19,9.9,16.6,27.0,35.5,18.0,9.0,0.01,999.9,1000
1,1001099999,2022-01-02,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",24.7,20.0,990.3,989.1,18,1.9,23.0,44.5,60.0,33.3,16.2,0.11,999.9,1000
2,1001099999,2022-01-03,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",30.9,21.7,1002.1,0.9,18,,33.6,41.0,56.9,33.8,26.8,0.0,999.9,0
3,1001099999,2022-01-04,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",23.6,14.4,1012.2,11.0,15,,19.8,29.7,39.0,26.4,21.6,0.0,999.9,1000
4,1001099999,2022-01-05,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",23.1,16.5,1007.6,6.4,19,,6.6,12.8,15.9,24.8,20.1,0.04,999.9,1000


For snow depth missing data will be reported as 999.9.  However, most stations do not report 0 on days with no snow on ground.  As such I will clean this value to zero on such days.

In [18]:
def set_zero_999(arg):
    if arg == 999.9:
        return 0
    else:
        return arg
    
merged_frame['SNDP'] = merged_frame['SNDP'].apply(set_zero_999)

In [19]:
merged_frame.head()

Unnamed: 0,STATION,DATE,LATITUDE,LONGITUDE,ELEVATION,NAME,TEMP,DEWP,SLP,STP,STP_ATTRIBUTES,VISIB,WDSP,MXSPD,GUST,MAX,MIN,PRCP,SNDP,FRSHTT
0,1001099999,2022-01-01,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",12.1,3.8,1010.2,9.0,19,9.9,16.6,27.0,35.5,18.0,9.0,0.01,0.0,1000
1,1001099999,2022-01-02,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",24.7,20.0,990.3,989.1,18,1.9,23.0,44.5,60.0,33.3,16.2,0.11,0.0,1000
2,1001099999,2022-01-03,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",30.9,21.7,1002.1,0.9,18,,33.6,41.0,56.9,33.8,26.8,0.0,0.0,0
3,1001099999,2022-01-04,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",23.6,14.4,1012.2,11.0,15,,19.8,29.7,39.0,26.4,21.6,0.0,0.0,1000
4,1001099999,2022-01-05,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",23.1,16.5,1007.6,6.4,19,,6.6,12.8,15.9,24.8,20.1,0.04,0.0,1000


FRSHTT is a combined indicator for occurences of meteorological conditions.  The interpretation for this field is coppied from the data source readme (https://www.ncei.noaa.gov/data/global-summary-of-the-day/doc/readme.txt).


FRSHTT - Indicators (1 = yes, 0 = no/not reported) for the occurrence during the day of:
                         Fog ('F' - 1st digit).
                         Rain or Drizzle ('R' - 2nd digit).
                         Snow or Ice Pellets ('S' - 3rd digit).
                         Hail ('H' - 4th digit).
                         Thunder ('T' - 5th digit).
                         Tornado or Funnel Cloud ('T' - 6th digit).

In [20]:
def process_FRSHTT(arg):
    string = str(arg)
    if len(string)>6:
        #more than six chars would be invalid
        return np.NaN
    index = 0
    outputs = ['Fog', 'Rain', 'Snow', 'Hail', 'Thunder', 'Tornado']
    output = []
    for char in string:
        if char == '1':
            output.append(outputs[index])
        index+=1
    return "|".join(output)

In [21]:
merged_frame['FRSHTT'] = merged_frame['FRSHTT'].apply(process_FRSHTT)

In [22]:
merged_frame.head()

Unnamed: 0,STATION,DATE,LATITUDE,LONGITUDE,ELEVATION,NAME,TEMP,DEWP,SLP,STP,STP_ATTRIBUTES,VISIB,WDSP,MXSPD,GUST,MAX,MIN,PRCP,SNDP,FRSHTT
0,1001099999,2022-01-01,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",12.1,3.8,1010.2,9.0,19,9.9,16.6,27.0,35.5,18.0,9.0,0.01,0.0,Fog
1,1001099999,2022-01-02,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",24.7,20.0,990.3,989.1,18,1.9,23.0,44.5,60.0,33.3,16.2,0.11,0.0,Fog
2,1001099999,2022-01-03,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",30.9,21.7,1002.1,0.9,18,,33.6,41.0,56.9,33.8,26.8,0.0,0.0,
3,1001099999,2022-01-04,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",23.6,14.4,1012.2,11.0,15,,19.8,29.7,39.0,26.4,21.6,0.0,0.0,Fog
4,1001099999,2022-01-05,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",23.1,16.5,1007.6,6.4,19,,6.6,12.8,15.9,24.8,20.1,0.04,0.0,Fog


In [23]:
merged_frame.drop('STP_ATTRIBUTES', axis=1, inplace=True)

99.99 denotes a missing PRCP value.  Many stations do not report precip if the value is 0.  So I will replace those values with 0.

In [24]:
def set_zero_99(arg):
    if arg == 99.99:
        return 0
    else:
        return arg
    
merged_frame['PRCP'] = merged_frame['PRCP'].apply(set_zero_99)

In [25]:
merged_frame.head()

Unnamed: 0,STATION,DATE,LATITUDE,LONGITUDE,ELEVATION,NAME,TEMP,DEWP,SLP,STP,VISIB,WDSP,MXSPD,GUST,MAX,MIN,PRCP,SNDP,FRSHTT
0,1001099999,2022-01-01,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",12.1,3.8,1010.2,9.0,9.9,16.6,27.0,35.5,18.0,9.0,0.01,0.0,Fog
1,1001099999,2022-01-02,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",24.7,20.0,990.3,989.1,1.9,23.0,44.5,60.0,33.3,16.2,0.11,0.0,Fog
2,1001099999,2022-01-03,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",30.9,21.7,1002.1,0.9,,33.6,41.0,56.9,33.8,26.8,0.0,0.0,
3,1001099999,2022-01-04,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",23.6,14.4,1012.2,11.0,,19.8,29.7,39.0,26.4,21.6,0.0,0.0,Fog
4,1001099999,2022-01-05,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",23.1,16.5,1007.6,6.4,,6.6,12.8,15.9,24.8,20.1,0.04,0.0,Fog


MAX and MIN also report missing data as 9999.9.  Cleaning as such with NaN.

In [26]:
merged_frame['MAX'] = merged_frame['MAX'].apply(set_null_9999) 
merged_frame['MIN'] = merged_frame['MIN'].apply(set_null_9999)

In [27]:
merged_frame.head()

Unnamed: 0,STATION,DATE,LATITUDE,LONGITUDE,ELEVATION,NAME,TEMP,DEWP,SLP,STP,VISIB,WDSP,MXSPD,GUST,MAX,MIN,PRCP,SNDP,FRSHTT
0,1001099999,2022-01-01,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",12.1,3.8,1010.2,9.0,9.9,16.6,27.0,35.5,18.0,9.0,0.01,0.0,Fog
1,1001099999,2022-01-02,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",24.7,20.0,990.3,989.1,1.9,23.0,44.5,60.0,33.3,16.2,0.11,0.0,Fog
2,1001099999,2022-01-03,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",30.9,21.7,1002.1,0.9,,33.6,41.0,56.9,33.8,26.8,0.0,0.0,
3,1001099999,2022-01-04,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",23.6,14.4,1012.2,11.0,,19.8,29.7,39.0,26.4,21.6,0.0,0.0,Fog
4,1001099999,2022-01-05,70.933333,-8.666667,9.0,"JAN MAYEN NOR NAVY, NO",23.1,16.5,1007.6,6.4,,6.6,12.8,15.9,24.8,20.1,0.04,0.0,Fog


In [28]:
merged_frame.to_csv('2022-merged-cleaned.csv.dat')#appending .dat to avoid accidental reprocessing if script is rerun.
