In [36]:
import pandas as pd
import numpy as np
import altair as alt

In [37]:
df=pd.read_csv('Boonsong Lekagul waterways readings (2).csv')
df.head(4)

Unnamed: 0,id,value,location,sample date,measure
0,2221,2.0,Boonsri,11-Jan-98,Water temperature
1,2223,9.1,Boonsri,11-Jan-98,Dissolved oxygen
2,2227,0.33,Boonsri,11-Jan-98,Ammonium
3,2228,0.01,Boonsri,11-Jan-98,Nitrites


In [38]:
df.shape  # the Dataset have 136824 rows and 5 columns

(136824, 5)

In [39]:
df.size

684120

For Checking NULL values in the dataset

In [40]:
df.info() # This gives us all the information about the dataset's insights

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 136824 entries, 0 to 136823
Data columns (total 5 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   id           136824 non-null  int64  
 1   value        136824 non-null  float64
 2   location     136824 non-null  object 
 3   sample date  136824 non-null  object 
 4   measure      136824 non-null  object 
dtypes: float64(1), int64(1), object(3)
memory usage: 5.2+ MB


Hence we can see that there are no NULL values in the dataset

In [41]:
df.columns

Index(['id', 'value', 'location', 'sample date', 'measure'], dtype='object')

In [42]:
df.dtypes

id               int64
value          float64
location        object
sample date     object
measure         object
dtype: object

We can see that the column 'sample date' is of type "object" there we have to convert it into "date time" format

In [43]:
df['sample date'] = df['sample date'].apply(lambda x: pd.Timestamp(x).strftime('%Y-%m-%d'))   # converting 'sample date' column from string to DateTime type

In [44]:
df.head() #df['sample date']=pd.to_datetime(df['sample date'])

Unnamed: 0,id,value,location,sample date,measure
0,2221,2.0,Boonsri,1998-01-11,Water temperature
1,2223,9.1,Boonsri,1998-01-11,Dissolved oxygen
2,2227,0.33,Boonsri,1998-01-11,Ammonium
3,2228,0.01,Boonsri,1998-01-11,Nitrites
4,2229,1.47,Boonsri,1998-01-11,Nitrates


In [45]:
df['location'].unique()  # collecting all the 'locations' in the dataset

array(['Boonsri', 'Kannika', 'Chai', 'Kohsoom', 'Somchair', 'Sakda',
       'Busarakhan', 'Tansanee', 'Achara', 'Decha'], dtype=object)

In [46]:
sites=['Boonsri', 'Kannika', 'Chai', 'Kohsoom', 'Somchair', 'Sakda','Busarakhan', 'Tansanee', 'Achara', 'Decha']

In [47]:
df['measure'].unique()  # all the types of "measure" in dataset

array(['Water temperature', 'Dissolved oxygen', 'Ammonium', 'Nitrites',
       'Nitrates', 'Orthophosphate-phosphorus', 'Total phosphorus',
       'Sodium', 'Potassium', 'Calcium', 'Magnesium', 'Chlorides',
       'Sulphates', 'Iron', 'Manganese', 'Zinc', 'Copper', 'Chromium',
       'Lead', 'Cadmium', 'Mercury', 'Nickel', 'Arsenic',
       'Biochemical Oxygen', 'Chemical Oxygen Demand (Cr)',
       'Chemical Oxygen Demand (Mn)', 'AOX', 'Atrazine', 'Cesium',
       'Macrozoobenthos', 'Total coliforms', 'Fecal coliforms', 'p,p-DDT',
       'gamma-Hexachlorocyclohexane', 'Bicarbonates',
       'Anionic active surfactants', 'Total extractable matter',
       'Fecal streptococci ', 'Petroleum hydrocarbons', 'PAHs',
       'Benzo(a)pyrene', 'Benzo(g,h,i)perylene', 'Benzo(b)fluoranthene',
       'Benzo(k)fluoranthene', 'Fluoranthene', 'Indeno(1,2,3-c,d)pyrene',
       'PCB 28', 'PCB 52', 'PCB 101', 'PCB 138', 'PCB 153', 'PCB 180',
       'Silica (SiO2)', 'Oxygen saturation', 'Total hardness'

We will be dealing with 'sites' in "locations" list and 'measure' in "measures" list

In [48]:
locations=['Boonsri','Kannika','Chai']

In [49]:
measures=['Ammonium','Calcium','Nitrites']

## Plotting and Visualization of the locations on basis of their 'measure' values

1 For Boonsri and Ammonium

In [50]:
df_sudden_change_Boonsri_Ammonium=df[(df['location']=='Boonsri') & (df['measure']=='Ammonium')]
df_sudden_change_Boonsri_Ammonium.head()         # preprocessing of dataset i.e. filtration part

Unnamed: 0,id,value,location,sample date,measure
2,2227,0.33,Boonsri,1998-01-11,Ammonium
226,2267,0.14,Boonsri,1998-02-15,Ammonium
450,2307,0.2,Boonsri,1998-03-12,Ammonium
674,2347,0.12,Boonsri,1998-04-11,Ammonium
708,2710,0.14,Boonsri,1998-04-11,Ammonium


In [51]:
data_sudden_change_Boonsri_Ammonium=df_sudden_change_Boonsri_Ammonium.drop(['id','location','measure'],axis='columns')
data_sudden_change_Boonsri_Ammonium.head()       # preprocessing of dataset

Unnamed: 0,value,sample date
2,0.33,1998-01-11
226,0.14,1998-02-15
450,0.2,1998-03-12
674,0.12,1998-04-11
708,0.14,1998-04-11


In [52]:
alt.Chart(data_sudden_change_Boonsri_Ammonium).mark_point(size=100).encode(
    x='sample date',
    y='value',
)

In [53]:
alt.Chart(data_sudden_change_Boonsri_Ammonium).mark_line().encode(
    x='sample date',
    y='value',
)

In the beginning years there is no trend in Ammonium at Boonsri but after 2011 it's concentration declined 
There are very few anomalies such as- In the year 19-04-2007 the value of ammonium is very high
                                 or like in the year 31-10-2014 ammonium abruptly raised

2 For Kannika an Ammonium

In [54]:
df_sudden_change_Kannika_Ammonium=df[(df['location']=='Kannika') & (df['measure']=='Ammonium')]
df_sudden_change_Kannika_Ammonium.head()            # preprocessing of dataset

Unnamed: 0,id,value,location,sample date,measure
34,8297,0.3,Kannika,1998-01-26,Ammonium
66,8778,0.28,Kannika,1998-01-26,Ammonium
98,9261,0.27,Kannika,1998-01-26,Ammonium
354,8337,0.14,Kannika,1998-02-24,Ammonium
386,8818,0.13,Kannika,1998-02-24,Ammonium


In [55]:
data_sudden_change_Kannika_Ammonium=df_sudden_change_Kannika_Ammonium.drop(['id','location','measure'],axis='columns')
data_sudden_change_Kannika_Ammonium.head()         # preprocessing of dataset

Unnamed: 0,value,sample date
34,0.3,1998-01-26
66,0.28,1998-01-26
98,0.27,1998-01-26
354,0.14,1998-02-24
386,0.13,1998-02-24


In [56]:
alt.Chart(data_sudden_change_Kannika_Ammonium).mark_point(size=100).encode(
    x='sample date',
    y='value',
)

In [57]:
alt.Chart(data_sudden_change_Kannika_Ammonium).mark_line().encode(
    x='sample date',
    y='value',
)

We cannot find any pattern in the early years of Ammonium at Kanika and there are many anomalies such as in 2000-01-19 we saw, and since year 2009 it's value kept on decreasing

3 For Chai and Ammonium

In [58]:
df_sudden_change_Chai_Ammonium=df[(df['location']=='Chai') & (df['measure']=='Ammonium')]
df_sudden_change_Chai_Ammonium.head()            # preprocessing of dataset

Unnamed: 0,id,value,location,sample date,measure
130,5054,0.4,Chai,1998-01-31,Ammonium
162,5538,0.4,Chai,1998-01-31,Ammonium
194,6016,0.41,Chai,1998-01-31,Ammonium
258,5093,0.77,Chai,1998-02-20,Ammonium
290,5577,0.6,Chai,1998-02-20,Ammonium


In [59]:
data_sudden_change_Chai_Ammonium=df_sudden_change_Chai_Ammonium.drop(['id','location','measure'],axis='columns')
data_sudden_change_Chai_Ammonium.head()           # preprocessing of dataset

Unnamed: 0,value,sample date
130,0.4,1998-01-31
162,0.4,1998-01-31
194,0.41,1998-01-31
258,0.77,1998-02-20
290,0.6,1998-02-20


In [60]:
alt.Chart(data_sudden_change_Chai_Ammonium).mark_point(size=100).encode(
    x='sample date',
    y='value',
)

In [61]:
alt.Chart(data_sudden_change_Chai_Ammonium).mark_line().encode(
    x='sample date',
    y='value',
)

In the beginning years there is no trend in Ammonium at Chai but after 2011 it's concentration declined
There are very few anomalies such as- In the year 19-04-2007 the value of Ammonium is very high or like in the year 31-10-2014 Ammonium abruptly raised

Chart concatenation of Ammonium at sites 'Chai' and 'Boonsri'

In [62]:
chart1=alt.Chart(data_sudden_change_Chai_Ammonium).mark_point(color="tomato",size=100).encode(x="sample date",y="value")
chart2=alt.Chart(data_sudden_change_Boonsri_Ammonium).mark_point(size=100).encode(x="sample date",y="value")
chart1 & chart2

Chart concatenation of Ammonium at sites 'Kanika' and 'Boonsri'

In [63]:
chart1=alt.Chart(data_sudden_change_Kannika_Ammonium).mark_point(color="tomato",size=100).encode(x="sample date",y="value")
chart2=alt.Chart(data_sudden_change_Boonsri_Ammonium).mark_point(size=100).encode(x="sample date",y="value")
chart1 & chart2

Chart Concatenation of "Ammonium" at sites 'Chai' and 'Kannika'

In [64]:
chart1=alt.Chart(data_sudden_change_Chai_Ammonium).mark_point(color="tomato",size=100).encode(x="sample date",y="value")
chart2=alt.Chart(data_sudden_change_Kannika_Ammonium).mark_point(size=100).encode(x="sample date",y="value")
chart1 & chart2

1 For Boonsri Calcium

In [65]:
df_sudden_change_Boonsri_Calcium=df[(df['location']=='Boonsri') & (df['measure']=='Calcium')]
df_sudden_change_Boonsri_Calcium.head()

Unnamed: 0,id,value,location,sample date,measure
9,2234,52.0,Boonsri,1998-01-11,Calcium
233,2274,56.0,Boonsri,1998-02-15,Calcium
457,2314,68.0,Boonsri,1998-03-12,Calcium
681,2354,51.0,Boonsri,1998-04-11,Calcium
715,2717,50.0,Boonsri,1998-04-11,Calcium


In [66]:
data_sudden_change_Boonsri_Calcium=df_sudden_change_Boonsri_Calcium.drop(['id','location','measure'],axis='columns')
data_sudden_change_Boonsri_Calcium.head()

Unnamed: 0,value,sample date
9,52.0,1998-01-11
233,56.0,1998-02-15
457,68.0,1998-03-12
681,51.0,1998-04-11
715,50.0,1998-04-11


In [67]:
alt.Chart(data_sudden_change_Boonsri_Calcium).mark_line().encode(
    x='sample date',
    y='value',
)

In [68]:
alt.Chart(data_sudden_change_Boonsri_Calcium).mark_point(size=100).encode(
    x='sample date',
    y='value',
)

There is no trend or pattern in the values of Calcium at Boonsri and we can also see that value of calcium is varying a lot. There are very few anomalies in the plotting.

2 For Chai and Calcium

In [69]:
df_sudden_change_Chai_Calcium=df[(df['location']=='Chai') & (df['measure']=='Calcium')]
df_sudden_change_Chai_Calcium.head()

Unnamed: 0,id,value,location,sample date,measure
137,5061,52.6,Chai,1998-01-31,Calcium
169,5545,51.8,Chai,1998-01-31,Calcium
201,6023,56.9,Chai,1998-01-31,Calcium
265,5100,62.87,Chai,1998-02-20,Calcium
297,5584,62.02,Chai,1998-02-20,Calcium


In [70]:
data_sudden_change_Chai_Calcium=df_sudden_change_Chai_Calcium.drop(['id','location','measure'],axis='columns')
data_sudden_change_Chai_Calcium.head()

Unnamed: 0,value,sample date
137,52.6,1998-01-31
169,51.8,1998-01-31
201,56.9,1998-01-31
265,62.87,1998-02-20
297,62.02,1998-02-20


In [71]:
alt.Chart(data_sudden_change_Chai_Calcium).mark_line().encode(
    x='sample date',
    y='value',
)

In [72]:
alt.Chart(data_sudden_change_Chai_Calcium).mark_point(size=100).encode(
    x='sample date',
    y='value',
)

There is no trend or pattern in the values of Calcium at Chai and we can also see that value of calcium is varying a lot. There are a lot of anomalies in the plotting such as in year 22-05-11 the amount of calcium abruptly fell down.

3 For Kannika and Calcium

In [73]:
df_sudden_change_Kannika_Calcium=df[(df['location']=='Kannika') & (df['measure']=='Calcium')]
df_sudden_change_Kannika_Calcium.head()

Unnamed: 0,id,value,location,sample date,measure
41,8304,67.19,Kannika,1998-01-26,Calcium
73,8785,66.4,Kannika,1998-01-26,Calcium
105,9268,67.2,Kannika,1998-01-26,Calcium
361,8344,52.3,Kannika,1998-02-24,Calcium
393,8825,52.3,Kannika,1998-02-24,Calcium


In [74]:
data_sudden_change_Kannika_Calcium=df_sudden_change_Kannika_Calcium.drop(['id','location','measure'],axis='columns')
data_sudden_change_Kannika_Calcium.head()

Unnamed: 0,value,sample date
41,67.19,1998-01-26
73,66.4,1998-01-26
105,67.2,1998-01-26
361,52.3,1998-02-24
393,52.3,1998-02-24


In [75]:
alt.Chart(data_sudden_change_Kannika_Calcium).mark_line().encode(
    x='sample date',
    y='value',
)

In [76]:
alt.Chart(data_sudden_change_Kannika_Calcium).mark_point(size=100).encode(
    x='sample date',
    y='value',
)

There is no trend or pattern in the values of Calcium at Kannika and we can also see that value of calcium is varying a lot. There are a lot of anomalies in the plotting such as in year 19-08-2004 the amount of calcium abruptly fell down.

Chart concatenation of Kannika and Chai for Calcium

In [77]:
chart1=alt.Chart(data_sudden_change_Kannika_Calcium).mark_point(color="tomato",size=100).encode(x="sample date",y="value")
chart2=alt.Chart(data_sudden_change_Chai_Calcium).mark_point(size=100).encode(x="sample date",y="value")
chart1 & chart2

Chart concatenation of Boonsri and Chai for Calcium

In [78]:
chart1=alt.Chart(data_sudden_change_Boonsri_Calcium).mark_point(color="tomato",size=100).encode(x="sample date",y="value")
chart2=alt.Chart(data_sudden_change_Chai_Calcium).mark_point(size=100).encode(x="sample date",y="value")
chart1 & chart2

Chart concatenation of Kannika and Boonsri for Calcium

In [79]:
chart1=alt.Chart(data_sudden_change_Kannika_Calcium).mark_point(color="tomato",size=100).encode(x="sample date",y="value")
chart2=alt.Chart(data_sudden_change_Boonsri_Calcium).mark_point(size=100).encode(x="sample date",y="value")
chart1 & chart2

1 For Boonsri Nitrites

In [80]:
df_sudden_change_Boonsri_Nitrites=df[(df['location']=='Boonsri') & (df['measure']=='Nitrites')]
df_sudden_change_Boonsri_Nitrites.head()

Unnamed: 0,id,value,location,sample date,measure
3,2228,0.01,Boonsri,1998-01-11,Nitrites
227,2268,0.03,Boonsri,1998-02-15,Nitrites
451,2308,0.03,Boonsri,1998-03-12,Nitrites
675,2348,0.06,Boonsri,1998-04-11,Nitrites
709,2711,0.05,Boonsri,1998-04-11,Nitrites


In [81]:
data_sudden_change_Boonsri_Nitrites=df_sudden_change_Boonsri_Nitrites.drop(['id','location','measure'],axis='columns')
data_sudden_change_Boonsri_Nitrites.head()

Unnamed: 0,value,sample date
3,0.01,1998-01-11
227,0.03,1998-02-15
451,0.03,1998-03-12
675,0.06,1998-04-11
709,0.05,1998-04-11


In [82]:
alt.Chart(data_sudden_change_Boonsri_Nitrites).mark_line().encode(
    x='sample date',
    y='value',
)

In [83]:
alt.Chart(data_sudden_change_Boonsri_Nitrites).mark_point(size=100).encode(
    x='sample date',
    y='value',
)

The value of Nitrites at location Boonsri is almost uniform with very few anomalies such as in year 17-08-2000. It stays uniform most of the time

2 For Chai Nitrites

In [84]:
df_sudden_change_Chai_Nitrites=df[(df['location']=='Chai') & (df['measure']=='Nitrites')]
df_sudden_change_Chai_Nitrites.head()

Unnamed: 0,id,value,location,sample date,measure
131,5055,0.03,Chai,1998-01-31,Nitrites
163,5539,0.02,Chai,1998-01-31,Nitrites
195,6017,0.03,Chai,1998-01-31,Nitrites
259,5094,0.02,Chai,1998-02-20,Nitrites
291,5578,0.02,Chai,1998-02-20,Nitrites


In [85]:
data_sudden_change_Chai_Nitrites=df_sudden_change_Chai_Nitrites.drop(['id','location','measure'],axis='columns')
data_sudden_change_Chai_Nitrites.head()

Unnamed: 0,value,sample date
131,0.03,1998-01-31
163,0.02,1998-01-31
195,0.03,1998-01-31
259,0.02,1998-02-20
291,0.02,1998-02-20


In [86]:
alt.Chart(data_sudden_change_Chai_Nitrites).mark_line().encode(
    x='sample date',
    y='value',
)

In [87]:
alt.Chart(data_sudden_change_Chai_Nitrites).mark_point(size=100).encode(
    x='sample date',
    y='value',
)

The value of Nitrites at Chai is varying a lot in non-uniform manner and also there are a lot of anomalies such as in year 13-08-2004 in abruptly increased

3 For Kannika Nitrites

In [88]:
df_sudden_change_Kannika_Nitrites=df[(df['location']=='Kannika') & (df['measure']=='Nitrites')]
df_sudden_change_Kannika_Nitrites.head()

Unnamed: 0,id,value,location,sample date,measure
35,8298,0.03,Kannika,1998-01-26,Nitrites
67,8779,0.03,Kannika,1998-01-26,Nitrites
99,9262,0.03,Kannika,1998-01-26,Nitrites
355,8338,0.05,Kannika,1998-02-24,Nitrites
387,8819,0.05,Kannika,1998-02-24,Nitrites


In [89]:
data_sudden_change_Kannika_Nitrites=df_sudden_change_Kannika_Nitrites.drop(['id','location','measure'],axis='columns')
data_sudden_change_Kannika_Nitrites.head()

Unnamed: 0,value,sample date
35,0.03,1998-01-26
67,0.03,1998-01-26
99,0.03,1998-01-26
355,0.05,1998-02-24
387,0.05,1998-02-24


In [90]:
alt.Chart(data_sudden_change_Kannika_Nitrites).mark_line().encode(
    x='sample date',
    y='value',
)

In [91]:
alt.Chart(data_sudden_change_Kannika_Nitrites).mark_point(size=100).encode(
    x='sample date',
    y='value',
)

The value of Nitrites at Kannika is uniform thorughout and very rare anomalies. It stays uniform with almost linearity.

Chart Concatenation of Kannika and Chai

In [92]:
chart1=alt.Chart(data_sudden_change_Kannika_Nitrites).mark_point(color="tomato",size=100).encode(x="sample date",y="value")
chart2=alt.Chart(data_sudden_change_Chai_Nitrites).mark_point(size=100).encode(x="sample date",y="value")
chart1 & chart2

Chart Concatenation of Boonsri and Chai

In [93]:
chart1=alt.Chart(data_sudden_change_Boonsri_Nitrites).mark_point(color="tomato",size=100).encode(x="sample date",y="value")
chart2=alt.Chart(data_sudden_change_Chai_Nitrites).mark_point(size=100).encode(x="sample date",y="value")
chart1 & chart2

Chart Concatenation of Kannika and Boonsri

In [94]:
chart1=alt.Chart(data_sudden_change_Kannika_Nitrites).mark_point(
    size=100,
    color='tomato'
).encode(
    x="sample date",
    y="value")
chart2=alt.Chart(data_sudden_change_Boonsri_Nitrites).mark_point(
    size=100).encode(
    x="sample date",
    y="value")
chart1 & chart2

In [95]:
df.head()

Unnamed: 0,id,value,location,sample date,measure
0,2221,2.0,Boonsri,1998-01-11,Water temperature
1,2223,9.1,Boonsri,1998-01-11,Dissolved oxygen
2,2227,0.33,Boonsri,1998-01-11,Ammonium
3,2228,0.01,Boonsri,1998-01-11,Nitrites
4,2229,1.47,Boonsri,1998-01-11,Nitrates


Visualization for Ammonium at three Boonsri, Chai, Kannika together

In [96]:
df_mixed=df[((df['location']=='Boonsri') | (df['location']=='Chai') | (df['location']=='Kannika')) & (df['measure']=='Ammonium')]

In [97]:
df_mixed.head()

Unnamed: 0,id,value,location,sample date,measure
2,2227,0.33,Boonsri,1998-01-11,Ammonium
34,8297,0.3,Kannika,1998-01-26,Ammonium
66,8778,0.28,Kannika,1998-01-26,Ammonium
98,9261,0.27,Kannika,1998-01-26,Ammonium
130,5054,0.4,Chai,1998-01-31,Ammonium


In [98]:
df_mixed=df_mixed.drop(['id','measure'],axis='columns')
df_mixed.head()

Unnamed: 0,value,location,sample date
2,0.33,Boonsri,1998-01-11
34,0.3,Kannika,1998-01-26
66,0.28,Kannika,1998-01-26
98,0.27,Kannika,1998-01-26
130,0.4,Chai,1998-01-31


In [99]:
alt.Chart(df_mixed).mark_circle(size=200).encode(
    x='sample date',
    y='value',
    color='location',
    
).properties(
    title='Values of Ammonium at "locations"'
)

Blue - Boonsri,
Yellow - Chai,
Red - Kannika

Here we can see that around year 1998 value of Ammonium at 'Chai' is much greater than the others
whereas around year 2000 value of Ammonium at Kannika was much greater

In [100]:
df_mixed=df[((df['location']=='Boonsri') | (df['location']=='Chai') | (df['location']=='Kannika')) & (df['measure']=='Calcium')]

In [101]:
df_mixed=df_mixed.drop(['id','measure'],axis='columns')
df_mixed.head()

Unnamed: 0,value,location,sample date
9,52.0,Boonsri,1998-01-11
41,67.19,Kannika,1998-01-26
73,66.4,Kannika,1998-01-26
105,67.2,Kannika,1998-01-26
137,52.6,Chai,1998-01-31


In [102]:
alt.Chart(df_mixed).mark_circle(size=200).encode(
    x='sample date',
    y='value',
    color='location',
    
).properties(
    title='Values of Calcium at "locations"'
) 

In [103]:
df_mixed=df[((df['location']=='Boonsri') | (df['location']=='Chai') | (df['location']=='Kannika')) & (df['measure']=='Nitrites')]

In [104]:
df_mixed.head()

Unnamed: 0,id,value,location,sample date,measure
3,2228,0.01,Boonsri,1998-01-11,Nitrites
35,8298,0.03,Kannika,1998-01-26,Nitrites
67,8779,0.03,Kannika,1998-01-26,Nitrites
99,9262,0.03,Kannika,1998-01-26,Nitrites
131,5055,0.03,Chai,1998-01-31,Nitrites


In [105]:
df_mixed=df_mixed.drop(['id','measure'],axis='columns')
df_mixed.head()

Unnamed: 0,value,location,sample date
3,0.01,Boonsri,1998-01-11
35,0.03,Kannika,1998-01-26
67,0.03,Kannika,1998-01-26
99,0.03,Kannika,1998-01-26
131,0.03,Chai,1998-01-31


In [106]:
alt.Chart(df_mixed).mark_point(size=200).encode(
    x='sample date',
    y='value',
    color='location',
    
).properties(
    title='Values of Nitrites at "locations"'
).interactive()

## Now we will group them according to dates and then calculate their mean values

In [107]:
df=pd.read_csv('Boonsong Lekagul waterways readings (2).csv')
df.head()

Unnamed: 0,id,value,location,sample date,measure
0,2221,2.0,Boonsri,11-Jan-98,Water temperature
1,2223,9.1,Boonsri,11-Jan-98,Dissolved oxygen
2,2227,0.33,Boonsri,11-Jan-98,Ammonium
3,2228,0.01,Boonsri,11-Jan-98,Nitrites
4,2229,1.47,Boonsri,11-Jan-98,Nitrates


## A function returning a dataframe that takes help of dates for statistical analysis of data

In [108]:
def groupbydatesandlocation(df,chemical,site):
    df_mean=df[(df['measure']==chemical)&(df['location']==site)]
    df_mean=df_mean.drop(['measure','id'],axis=1)
    df_mean['date']=pd.to_datetime(df_mean['sample date'])
    df_mean=df_mean.drop(['sample date'],axis=1)
    df_mean = df_mean.set_index('date')
    df_mean=df_mean.resample('A').mean()
    df_mean=df_mean.reset_index()
    return df_mean

In [109]:
df_Boonsri_ammonium=groupbydatesandlocation(df,'Ammonium','Boonsri')
df_Boonsri_ammonium['location']="Boonsri"
chart_boonsri_mean=alt.Chart(df_Boonsri_ammonium).mark_circle(size=100,
    color='tomato').encode(
    x='date',
    y='value'
    
).properties(
    title='Values of Ammonium at Boonsri'
)
chart_boonsri_mean

We can see that the average values of ammonium found per year first increased and then decreased with very few anomalies

In [113]:
df_Chai_ammonium=groupbydatesandlocation(df,'Ammonium','Chai')
df_Chai_ammonium['location']="Chai"
chart_chai_mean=alt.Chart(df_Chai_ammonium).mark_circle(size=100,
    color='tomato').encode(
    x='date',
    y='value'
    
).properties(
    title='Values of Ammonium at Chai'
)
chart_chai_mean

We can see that the average values of ammonium found per year at Chai continously decreased with very few anomalies

In [114]:
df_Kannika_ammonium=groupbydatesandlocation(df,'Ammonium','Kannika')
df_Kannika_ammonium['location']="Kannika"
chart_kannika_mean=alt.Chart(df_Kannika_ammonium).mark_circle(size=100).encode(
    x='date',
    y='value'
    
).properties(
    title='Values of Ammonium at Kannika'
)
chart_kannika_mean

We can see that the average values of ammonium found per year at Kannika continously decreased with a lot of anomalies

In [115]:
chart_boonsri_mean | chart_kannika_mean

In [116]:
chart_boonsri_mean & chart_kannika_mean

Chart Concatenation in Altair

In [117]:
df_Chai_ammonium.head()

Unnamed: 0,date,value,location
0,1998-12-31,0.425278,Chai
1,1999-12-31,0.34,Chai
2,2000-12-31,0.438056,Chai
3,2001-12-31,0.305667,Chai
4,2002-12-31,0.34746,Chai


In [118]:
df_Boonsri_ammonium.head()

Unnamed: 0,date,value,location
0,1998-12-31,0.169524,Boonsri
1,1999-12-31,0.295161,Boonsri
2,2000-12-31,0.391714,Boonsri
3,2001-12-31,0.227667,Boonsri
4,2002-12-31,0.451587,Boonsri


In [119]:
df_Boonsri_Chai_mean=pd.concat([df_Boonsri_ammonium,df_Chai_ammonium,df_Kannika_ammonium])
df_Boonsri_Chai_mean.head()

Unnamed: 0,date,value,location
0,1998-12-31,0.169524,Boonsri
1,1999-12-31,0.295161,Boonsri
2,2000-12-31,0.391714,Boonsri
3,2001-12-31,0.227667,Boonsri
4,2002-12-31,0.451587,Boonsri


In [120]:
alt.Chart(df_Boonsri_Chai_mean).mark_point(size=200).encode(
    x='date',
    y='value',
    color='location',
    
).properties(
    title='Mean Values of Ammonium at Boonsri Chai and Kannika'
) 