#*step1:Convert NC data to CSV format to extract monthly average temperature

In [None]:
from netCDF4 import Dataset
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from pathlib import Path

In [None]:
#Reading in the netCDF file
df = Dataset("/content/tas_CA_daily_ssp126_CanESM5.nc")

In [None]:
#Displaying the names of the variables 
print(df.variables.keys())

dict_keys(['lon', 'lat', 'time', 'tas'])


In [None]:
print(df,type(df)) #View the information of the nc file, including the information of the steps that have been processed.


<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    dimensions(sizes): lon(13), lat(12), time(2190)
    variables(dimensions): float64 lon(lon), float64 lat(lat), int64 time(time), float32 tas(time, lat, lon)
    groups:  <class 'netCDF4._netCDF4.Dataset'>


In [None]:
print(df.variables) #View related variables

{'lon': <class 'netCDF4._netCDF4.Variable'>
float64 lon(lon)
    _FillValue: nan
    units: degrees_west
    long_name: lon
unlimited dimensions: 
current shape = (13,)
filling on, 'lat': <class 'netCDF4._netCDF4.Variable'>
float64 lat(lat)
    _FillValue: nan
    units: degrees_north
    long_name: lat
unlimited dimensions: 
current shape = (12,)
filling on, 'time': <class 'netCDF4._netCDF4.Variable'>
int64 time(time)
    long_name: time
    units: days since 2015-01-01 12:00:00.000000
    calendar: noleap
unlimited dimensions: 
current shape = (2190,)
filling on, default _FillValue of -9223372036854775806 used, 'tas': <class 'netCDF4._netCDF4.Variable'>
float32 tas(time, lat, lon)
    _FillValue: 1e+20
    units: Degrees Celsius
    long_name: Daily Near-Surface Air Temperature
unlimited dimensions: 
current shape = (2190, 12, 13)
filling on}


In [None]:
#To get each variable name, use variables[i].ncattrs() to get the attribute name of each variable
print(df.variables.keys())
for i in df.variables.keys():
    print('%s: %s' % (i, df.variables[i].ncattrs()))


dict_keys(['lon', 'lat', 'time', 'tas'])
lon: ['_FillValue', 'units', 'long_name']
lat: ['_FillValue', 'units', 'long_name']
time: ['long_name', 'units', 'calendar']
tas: ['_FillValue', 'units', 'long_name']


In [None]:
#Get the size of all dimensions and output
for i in df.dimensions.keys():
    print('%s_sizes: %s' % (i, df.dimensions[i].size))

lon_sizes: 13
lat_sizes: 12
time_sizes: 2190


In [None]:
import csv

In [None]:
time=df.variables['time'][:]

In [None]:
print(time)

[   0    1    2 ... 2187 2188 2189]


In [None]:
time = pd.date_range('2015-01-01', '2020-12-29',freq='1D')
time

DatetimeIndex(['2015-01-01', '2015-01-02', '2015-01-03', '2015-01-04',
               '2015-01-05', '2015-01-06', '2015-01-07', '2015-01-08',
               '2015-01-09', '2015-01-10',
               ...
               '2020-12-20', '2020-12-21', '2020-12-22', '2020-12-23',
               '2020-12-24', '2020-12-25', '2020-12-26', '2020-12-27',
               '2020-12-28', '2020-12-29'],
              dtype='datetime64[ns]', length=2190, freq='D')

In [None]:
import datetime
from dateutil.parser import parse

In [None]:
lon=df.variables['lon'][:]

In [None]:
print(lon)

[-120.125 -119.875 -119.625 -119.375 -119.125 -118.875 -118.625 -118.375
 -118.125 -117.875 -117.625 -117.375 -117.125]


In [None]:
#Convert CSV format
tas= df.variables['tas'][:]#surface temperature
lon=df.variables['lon'][:]#longitude
lat=df.variables['lat'][:]#latitude
with open('tas_CA_daily_ssp126_CanESM5.csv',mode='w') as ice_file:
    ice_writer=csv.writer(ice_file,delimiter=',',quotechar='"',quoting=csv.QUOTE_MINIMAL)
    ice_writer.writerow(['time','lat','lon','tas'])#The order is time, latitude, longitude and surface temperature
    for i in range(len(time)):
        for j in range(len(lat)):
            for k in range(len(lon)):
              if str(tas[i][j][k]) not in '--':
                    ice_writer.writerow([time[i],
                                         lat[j],
                                         lon[k],
                                         tas[i, j, k]])

In [None]:
data1 = pd.read_csv('tas_CA_daily_ssp126_CanESM5.csv')

In [None]:
data1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 341640 entries, 0 to 341639
Data columns (total 4 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   time    341640 non-null  object 
 1   lat     341640 non-null  float64
 2   lon     341640 non-null  float64
 3   tas     341640 non-null  float64
dtypes: float64(3), object(1)
memory usage: 10.4+ MB


In [None]:
data1['time'] = pd.to_datetime(data1['time'])
data1['time'].dt.strftime('%Y-%m-%d')

0         2015-01-01
1         2015-01-01
2         2015-01-01
3         2015-01-01
4         2015-01-01
             ...    
341635    2020-12-29
341636    2020-12-29
341637    2020-12-29
341638    2020-12-29
341639    2020-12-29
Name: time, Length: 341640, dtype: object

In [None]:
data1.head()

Unnamed: 0,time,lat,lon,tas
0,2015-01-01,39.125,-120.125,4.46701
1,2015-01-01,39.125,-119.875,4.822693
2,2015-01-01,39.125,-119.625,5.795868
3,2015-01-01,39.125,-119.375,4.988861
4,2015-01-01,39.125,-119.125,7.081543


In [None]:
data1.set_index('time',inplace=True)
monthly = data1.resample('M').mean()

In [None]:
monthly

Unnamed: 0_level_0,lat,lon,tas
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015-01-31,37.75,-118.625,7.404219
2015-02-28,37.75,-118.625,8.007508
2015-03-31,37.75,-118.625,7.037775
2015-04-30,37.75,-118.625,7.444456
2015-05-31,37.75,-118.625,13.386442
...,...,...,...
2020-08-31,37.75,-118.625,20.466889
2020-09-30,37.75,-118.625,16.240432
2020-10-31,37.75,-118.625,14.461048
2020-11-30,37.75,-118.625,8.163345


#step2:Extract monthly average temperature of CA modis

In [None]:
#impoort modis monthly average temperature
CA_modis = pd.read_csv('/content/CA_2015_2020_MODIS.csv', parse_dates=['date'], index_col='date')
CA_modis.head()

Unnamed: 0_level_0,system:index,temp1,temp10,temp11,temp12,temp2,temp3,temp4,temp5,temp6,temp7,temp8,temp9,.geo
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2015-01-01,0,5.890000000000043,-7.489999999999952,-7.489999999999952,-7.489999999999952,-7.489999999999952,-5.409999999999968,-7.489999999999952,-7.489999999999952,-7.489999999999952,-7.489999999999952,-7.489999999999952,-7.489999999999952,"{""type"":""MultiPoint"",""coordinates"":[]}"
2015-01-02,1,4.8700000000000045,-3.4699999999999704,-3.4699999999999704,-3.4699999999999704,-3.4699999999999704,1.57000000000005,-3.4699999999999704,-3.4699999999999704,-3.4699999999999704,-3.4699999999999704,-3.4699999999999704,-3.4699999999999704,"{""type"":""MultiPoint"",""coordinates"":[]}"
2015-01-03,2,5.830000000000041,-2.589999999999975,-2.589999999999975,-2.589999999999975,-2.589999999999975,4.250000000000057,-2.589999999999975,-2.589999999999975,-2.589999999999975,-2.589999999999975,-2.589999999999975,-2.589999999999975,"{""type"":""MultiPoint"",""coordinates"":[]}"
2015-01-04,3,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,"{""type"":""MultiPoint"",""coordinates"":[]}"
2015-01-05,4,5.57000000000005,-1.089999999999975,-1.089999999999975,-1.089999999999975,-1.089999999999975,no data,-1.089999999999975,-1.089999999999975,-1.089999999999975,-1.089999999999975,-1.089999999999975,-1.089999999999975,"{""type"":""MultiPoint"",""coordinates"":[]}"


In [None]:
missing_values=['no data']
CA_modis= CA_modis.replace(missing_values,np.NaN)
CA_modis = CA_modis.drop(columns=['system:index',	'.geo'])
CA_modis

Unnamed: 0_level_0,temp1,temp10,temp11,temp12,temp2,temp3,temp4,temp5,temp6,temp7,temp8,temp9
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2015-01-01,5.890000000000043,-7.489999999999952,-7.489999999999952,-7.489999999999952,-7.489999999999952,-5.409999999999968,-7.489999999999952,-7.489999999999952,-7.489999999999952,-7.489999999999952,-7.489999999999952,-7.489999999999952
2015-01-02,4.8700000000000045,-3.4699999999999704,-3.4699999999999704,-3.4699999999999704,-3.4699999999999704,1.57000000000005,-3.4699999999999704,-3.4699999999999704,-3.4699999999999704,-3.4699999999999704,-3.4699999999999704,-3.4699999999999704
2015-01-03,5.830000000000041,-2.589999999999975,-2.589999999999975,-2.589999999999975,-2.589999999999975,4.250000000000057,-2.589999999999975,-2.589999999999975,-2.589999999999975,-2.589999999999975,-2.589999999999975,-2.589999999999975
2015-01-04,,,,,,,,,,,,
2015-01-05,5.57000000000005,-1.089999999999975,-1.089999999999975,-1.089999999999975,-1.089999999999975,,-1.089999999999975,-1.089999999999975,-1.089999999999975,-1.089999999999975,-1.089999999999975,-1.089999999999975
...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-26,3.3700000000000045,-7.169999999999959,-7.169999999999959,-7.169999999999959,-7.169999999999959,,-7.169999999999959,-7.169999999999959,-7.169999999999959,-7.169999999999959,-7.169999999999959,-7.169999999999959
2020-12-27,,,,,,,,,,,,
2020-12-28,,,,,,,,,,,,
2020-12-29,4.2900000000000205,-10.109999999999957,-10.109999999999957,-10.109999999999957,-10.109999999999957,-4.269999999999982,-10.109999999999957,-10.109999999999957,-10.109999999999957,-10.109999999999957,-10.109999999999957,-10.109999999999957


In [None]:
CA_modis = CA_modis.ffill()
CA_modis

Unnamed: 0_level_0,temp1,temp10,temp11,temp12,temp2,temp3,temp4,temp5,temp6,temp7,temp8,temp9
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2015-01-01,5.890000000000043,-7.489999999999952,-7.489999999999952,-7.489999999999952,-7.489999999999952,-5.409999999999968,-7.489999999999952,-7.489999999999952,-7.489999999999952,-7.489999999999952,-7.489999999999952,-7.489999999999952
2015-01-02,4.8700000000000045,-3.4699999999999704,-3.4699999999999704,-3.4699999999999704,-3.4699999999999704,1.57000000000005,-3.4699999999999704,-3.4699999999999704,-3.4699999999999704,-3.4699999999999704,-3.4699999999999704,-3.4699999999999704
2015-01-03,5.830000000000041,-2.589999999999975,-2.589999999999975,-2.589999999999975,-2.589999999999975,4.250000000000057,-2.589999999999975,-2.589999999999975,-2.589999999999975,-2.589999999999975,-2.589999999999975,-2.589999999999975
2015-01-04,5.830000000000041,-2.589999999999975,-2.589999999999975,-2.589999999999975,-2.589999999999975,4.250000000000057,-2.589999999999975,-2.589999999999975,-2.589999999999975,-2.589999999999975,-2.589999999999975,-2.589999999999975
2015-01-05,5.57000000000005,-1.089999999999975,-1.089999999999975,-1.089999999999975,-1.089999999999975,4.250000000000057,-1.089999999999975,-1.089999999999975,-1.089999999999975,-1.089999999999975,-1.089999999999975,-1.089999999999975
...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-26,3.3700000000000045,-7.169999999999959,-7.169999999999959,-7.169999999999959,-7.169999999999959,2.590000000000032,-7.169999999999959,-7.169999999999959,-7.169999999999959,-7.169999999999959,-7.169999999999959,-7.169999999999959
2020-12-27,3.3700000000000045,-7.169999999999959,-7.169999999999959,-7.169999999999959,-7.169999999999959,2.590000000000032,-7.169999999999959,-7.169999999999959,-7.169999999999959,-7.169999999999959,-7.169999999999959,-7.169999999999959
2020-12-28,3.3700000000000045,-7.169999999999959,-7.169999999999959,-7.169999999999959,-7.169999999999959,2.590000000000032,-7.169999999999959,-7.169999999999959,-7.169999999999959,-7.169999999999959,-7.169999999999959,-7.169999999999959
2020-12-29,4.2900000000000205,-10.109999999999957,-10.109999999999957,-10.109999999999957,-10.109999999999957,-4.269999999999982,-10.109999999999957,-10.109999999999957,-10.109999999999957,-10.109999999999957,-10.109999999999957,-10.109999999999957


In [None]:
CA_modis.isnull().sum()

temp1     0
temp10    0
temp11    0
temp12    0
temp2     0
temp3     0
temp4     0
temp5     0
temp6     0
temp7     0
temp8     0
temp9     0
dtype: int64

In [None]:
CA_modis = CA_modis.astype(float)
CA_modis['average'] = CA_modis.mean(axis=1)
CA_modis

Unnamed: 0_level_0,temp1,temp10,temp11,temp12,temp2,temp3,temp4,temp5,temp6,temp7,temp8,temp9,average
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2015-01-01,5.89,-7.49,-7.49,-7.49,-7.49,-5.41,-7.49,-7.49,-7.49,-7.49,-7.49,-7.49,-6.201667
2015-01-02,4.87,-3.47,-3.47,-3.47,-3.47,1.57,-3.47,-3.47,-3.47,-3.47,-3.47,-3.47,-2.355000
2015-01-03,5.83,-2.59,-2.59,-2.59,-2.59,4.25,-2.59,-2.59,-2.59,-2.59,-2.59,-2.59,-1.318333
2015-01-04,5.83,-2.59,-2.59,-2.59,-2.59,4.25,-2.59,-2.59,-2.59,-2.59,-2.59,-2.59,-1.318333
2015-01-05,5.57,-1.09,-1.09,-1.09,-1.09,4.25,-1.09,-1.09,-1.09,-1.09,-1.09,-1.09,-0.090000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-26,3.37,-7.17,-7.17,-7.17,-7.17,2.59,-7.17,-7.17,-7.17,-7.17,-7.17,-7.17,-5.478333
2020-12-27,3.37,-7.17,-7.17,-7.17,-7.17,2.59,-7.17,-7.17,-7.17,-7.17,-7.17,-7.17,-5.478333
2020-12-28,3.37,-7.17,-7.17,-7.17,-7.17,2.59,-7.17,-7.17,-7.17,-7.17,-7.17,-7.17,-5.478333
2020-12-29,4.29,-10.11,-10.11,-10.11,-10.11,-4.27,-10.11,-10.11,-10.11,-10.11,-10.11,-10.11,-8.423333


In [None]:
CA_Monthly_data_month = CA_modis['average'].resample('M').mean()
CA_Monthly_data_month

date
2015-01-31     0.560108
2015-02-28     2.591071
2015-03-31     6.239839
2015-04-30     8.959333
2015-05-31    12.105430
                ...    
2020-08-31    28.298871
2020-09-30    23.698778
2020-10-31    18.145376
2020-11-30     6.969944
2020-12-31    -1.768444
Freq: M, Name: average, Length: 72, dtype: float64

In [None]:
df=CA_Monthly_data_month.to_frame()

In [None]:
df["average"]

date
2015-01-31     0.560108
2015-02-28     2.591071
2015-03-31     6.239839
2015-04-30     8.959333
2015-05-31    12.105430
                ...    
2020-08-31    28.298871
2020-09-30    23.698778
2020-10-31    18.145376
2020-11-30     6.969944
2020-12-31    -1.768444
Freq: M, Name: average, Length: 72, dtype: float64

In [None]:
monthly['tas']

time
2015-01-31     7.404219
2015-02-28     8.007508
2015-03-31     7.037775
2015-04-30     7.444456
2015-05-31    13.386442
                ...    
2020-08-31    20.466889
2020-09-30    16.240432
2020-10-31    14.461048
2020-11-30     8.163345
2020-12-31     5.626634
Freq: M, Name: tas, Length: 72, dtype: float64

#step3:calculate RMSE

In [None]:
from math import sqrt
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statistics import mean

In [None]:
RMSE = ((df["average"] - monthly['tas']) ** 2).mean() ** .5
print(RMSE)

5.920187201089324


In [None]:
import sklearn
actual = df["average"]
predicted = monthly['tas']

mse = sklearn.metrics.mean_squared_error(actual, predicted)

rmse = math.sqrt(mse)

print(rmse)

5.920187201089324
