In [None]:
import matplotlib.pyplot as plt

In [None]:
def hd_dict():
    '''This function reads in the header and returns a dictionary mapping station name onto station number.
    It reads all the lines in the text file following #STN LON(east) LAT(North) Alt(m) NAME.
    An example dictionary element is: 'EELDE': 280
    The dictionary created is used later in create_plots to get the station number corresponding to the station name 
    for which we are creating plots. '''
    sente=[]
    dictwords=[]
    dictword=[]
    stno=[]
    stname=[]
    idx1=0
    pos=0
    for line in f:
        line2=line.strip('\n')
        sente.append(line2.split(' '))
        if('NAME' in line2):
            idx1=len(sente)-1   #Index of the first line after the header i.e. the first station
        if('YYYYMMDD' in line2): #This is to stop reading when all the stations are done and the variable explanations start
            break
    for i in range (idx1+1,len(sente)):
        if(len(sente[i])<3): #We break out of the loop when the blank line after all the stations is reached
            break
        nameflag=0
        for dictword in sente[i]:
            if(':' in dictword): #station numbers are followed by : so using that to find the list element with station number
                stno.append(int(dictword[0:3])) 
                #The list element containing station number is of the form '260:' so we take only the first 3 characters and convert to int
            elif (len(dictword)>0 and dictword[0].isalpha()):
                pos=sente[i].index(dictword) #pos has the location of the first word of a station name
                separator=' '
                namelist=sente[i][pos:] #Some station names have multiple words, this is a list containing all the words
                name=separator.join(namelist) #We join the words into 1 string using ' ' in between
                stname.append(name)
                nameflag=1
            if(nameflag==1):
                break
    stationdict={stname[i]: stno[i] for i in range(len(stno))}
    return stationdict

In [None]:
def varmap():
    '''This function reads the last-but-one line of the header and returns a dictionary mapping variable name to its position in the header.
    It reads the line # STN,YYYYMMDD,DDVEC.... ,EV24
    Example dictionary element: 'SP':19
    The dictionary created is used in create_plots to find the positions of the variables we are making weather plots of like minimum temperature,
    precipitation, etc.'''
    sente=[]
    varwords=[]
    varword=[]
    idx1=0
    idx2=0
    varflag1=0
    for line in f:
        line2=line.strip('\n')
        sente.append(line2.split(' '))
        if('#' not in line2): #The lines containing data do not start with #, and the header is the last line before this, so we stop reading here
            break

    lsheader= sente[-3]  #list containing last-but-one line of the header
    var=[]
    varname=[]
    for head in lsheader:
        if (head.isalnum() or ',' in head):  
            var.append(head.split(','))     #variable names are separated by ,        
    for head in var:
        if (len(head)>1):   #When split(',') is used, some sublists are created that have elements that are not variable names, so those are not considered
            for h in head:
                if (h.isalnum()):
                    varname.append(h)
        else:
            if(head[0].isalnum()):  #Some elements of the list var have only variable names
                varname.append(head[0])
                
    vardict={varname[i]:i for i in range(len(varname))}                    
    return vardict

In [None]:
def createdatalist(f):
    '''This function reads all the weather data in the file and returns a nested list where each list has a row of data. 
    It takes the file pointer as input to read data.
    Empty fields are replaced with None, the station number is converted to int and other data are stored as float.
    Example list of data: 209,20010203,  106,   65,   66,   90,    8,   50,    1,  120,    8,     ,     ,     ,     ,     ,     ,     ,     
    ,     ,     ,     ,     ,     ,     ,     ,     ,     ,     ,     ,     ,     ,     ,     ,     ,     ,     ,     ,     ,     , 
    is converted to [209,'20010203',106.0,65.0,66.0,90.0,8.0,50.0,1.0,120.0,8.0,None,None,None,None.....None]
    The nested list is then used in create_plots to create weather plots for the desired station. 
    The lists with the corresponding station number are taken to make the plots'''
    sente=[]
    findata=[]
    findatawords=[]
    for line in f:
        line2=line.strip('\n')
        if('#' not in line): #Lines containing data do not start with #, all other lines do
            sente=line2.split(',') #data points are separated by ,
            findatawords=[]
            for varword in sente:
                varword_stripped=varword.strip() #stripping elements of spaces
                varword=None if varword_stripped=='' else float(varword_stripped) #converting empty fields to None and non empty to floats
                findatawords.append(varword)
            findatawords[0]=int(findatawords[0]) #station number is first column, converted to int
            findatawords[1]=int(findatawords[1]) 
            findatawords[1]=str(findatawords[1]) #date is converted back to string
            findata.append(findatawords)
    return findata

In [None]:
def create_plots(dict1, dict2, fdata,namest):
    '''This function is used to create a plot with 4 subplots containing the monthly averages of minimum and maximum temperature, daily sum of
    precipitation (in mm) and duration of sunshine per day in hours, for a specific station. The input parameters are:
    1. dict1: Dictionary mapping station name to station number, obtained using the function hd_dict
    2. dict2: Dictionary mapping variable name to position in the header, obtained using the function varmap
    3. fdata: Nested list containing weather data, obtained using the function createdatalist
    4. namest: Name of the station for which we wish to create weather plots, given by the user. '''
    month=[1,2,3,4,5,6,7,8,9,10,11,12]
    stationname=namest
    stnum=0 #station number corresponding to the station we are analyzing
    tnind=0 #column number for minimum temperature
    txind=0 #column number for maximum temperature
    rhind=0 #column number for daily precipitation
    sqind=0 #column number for sunshine duration
    pldatawords=[]
    #initialising lists for total and average minimum temperature, sunshine, etc and lists to keep track of the number of data points for each point.
    mintsum=[0.0]*13
    mintav=[0.0]*13
    mintcount=[0]*13
    maxtsum=[0.0]*13
    maxtcount=[0]*13
    maxtav=[0.0]*13
    rainsum=[0.0]*13
    raincount=[0]*13
    rainav=[0.0]*13
    sunsum=[0.0]*13
    suncount=[0]*13
    sunav=[0.0]*13
    for name,num in dict1.items(): #this loop finds the station number of the station name we are plotting data for, using the dictionary created in hd_dict()
        if(name==stationname):
            stnum=num
    for var,varpos in dict2.items(): #this loop finds locations of the variables needed for plots
        if(var=='TN'):
            tnind=varpos
        elif (var=='TX'):
            txind=varpos
        elif (var=='RH'):
            rhind=varpos
        elif (var=='SQ'):
            sqind=varpos
    curyear=0
    curmonth=0
    for i in range(0,len(fdata)):
        pldatawords=fdata[i]
        if(pldatawords[0]==stnum): #station found
            curyear=int(pldatawords[1][0:4])
            curmonth=int(pldatawords[1][4:6])
            curdata=pldatawords
            #below 4 variables keep count of non-None data points for the desired variables on a per-month basis (for example, 20 days of rain data available for Jan 2001
            mindaycount=0 
            maxdaycount=0
            raindaycount=0
            sundaycount=0 #count for number of days with non-empty sunshine hours, forgive the pun in the variable name
            #below 4 variables record average data of desired variables for a particular month
            minmt=0 
            maxmt=0 
            sunmt=0
            rainmt=0
            while(curdata[0]==stnum and int(curdata[1][0:4])==curyear and int(curdata[1][4:6])==curmonth): #this loop finds the minimum temperature of a particular month (add other variables later)
                #below we are adding all non-empty datapoints for max and min temperature of a particular month
                if(curdata[tnind]!=None):  
                    minmt+=curdata[tnind]
                    mindaycount+=1
                if(curdata[txind]!=None):
                    maxmt+=curdata[txind]
                    maxdaycount+=1
                #Here, I'm taking sunshine duration/precipitation to be 0 if it is <0.05 hours or mm but still counting the day towards the average
                if(curdata[sqind]!=None):
                    if(curdata[sqind]!=-1):  
                        sunmt+=curdata[sqind]
                    sundaycount+=1
                if(curdata[rhind]!=None):
                    if(curdata[rhind]!=-1):
                        rainmt+=curdata[rhind]  
                    raindaycount+=1
                i=i+1
                curdata=fdata[i]
            if (mindaycount!=0):
                minmt=minmt/(mindaycount*10) #We divide by 10 as the data is in 0.1 degrees celsius
                mintsum[curmonth]+=minmt     #Sum of minimum temperature for each month across all years for which data is available
                mintcount[curmonth]+=1   #Number of years for which we are taking average
            #repeating same proess for other 3 variables
            if (maxdaycount!=0): 
                maxmt=maxmt/(maxdaycount*10) 
                maxtsum[curmonth]+=maxmt     
                maxtcount[curmonth]+=1   
            if (sundaycount!=0):
                sunmt=sunmt/(sundaycount*10)
                sunsum[curmonth]+=sunmt
                suncount[curmonth]+=1
            if (raindaycount!=0):
                rainmt=rainmt/(raindaycount*10)
                rainsum[curmonth]+=rainmt
                raincount[curmonth]+=1

    #obtaining the average for each month across all years 
    for i in range(0,len(mintcount)):
        if(mintcount[i]!=0):
            mintav[i]=mintsum[i]/mintcount[i]
        if(maxtcount[i]!=0):
            maxtav[i]=maxtsum[i]/maxtcount[i]
        if(suncount[i]!=0):
            sunav[i]=sunsum[i]/suncount[i]
        if(raincount[i]!=0):
            rainav[i]=rainsum[i]/raincount[i]

    #creating plots
    simplot=plt.figure(figsize=(15.0,12.0))
    axes1=simplot.add_subplot(2,2,1)
    axes2=simplot.add_subplot(2,2,2)
    axes3=simplot.add_subplot(2,2,3)
    axes4=simplot.add_subplot(2,2,4)

    axes1.set_xlabel('Months')
    axes1.set_ylabel('Minimum temperature (Celsius)')
    axes1.plot(month[:],mintav[1:])

    axes2.set_xlabel('Months')
    axes2.set_ylabel('Maximum temperature (Celsius)')
    axes2.plot(month[:],maxtav[1:])

    axes3.set_xlabel('Months')
    axes3.set_ylabel('Precipitation (mm)')
    axes3.plot(month[:],rainav[1:])

    axes4.set_xlabel('Months')
    axes4.set_ylabel('Sunshine duration (hours/day)')
    axes4.plot(month[:],sunav[1:])

    axes1.set_xticks(month)
    axes2.set_xticks(month)
    axes3.set_xticks(month)
    axes4.set_xticks(month)
    
    plt.tight_layout(pad=5.0)
    plt.suptitle('WEATHER PLOTS FOR ' +namest+' STATION')
    plt.show()

In [None]:
stdict=[]  #dictionary that maps station name to number
varnamedict=[] #dictionary that maps variable name to position in header
finaldata=[] #nested list containing data of stations
f=' '
with open('C:\Sem1\Period1\Prog4AA\KNMI_20200825.txt','r') as f:   
    stdict=hd_dict()
    print('Dictionary mapping station name to station number:\n',stdict)
f.close()
with open('C:\Sem1\Period1\Prog4AA\KNMI_20200825.txt','r') as f:   
    varnamedict=varmap()
    print('Dictionary mapping variable name to column number in header:\n',varnamedict)
f.close()
with open('C:\Sem1\Period1\Prog4AA\KNMI_20200825.txt','r') as f:   
    finaldata=createdatalist(f)
f.close()
with open('C:\Sem1\Period1\Prog4AA\KNMI_20200825.txt','r') as f: 
    print('\n')
    create_plots(stdict,varnamedict,finaldata,'DE BILT')
    create_plots(stdict,varnamedict,finaldata,'SCHIPHOL')
f.close()