In [ ]:
import requests
import pandas as pd

base_url = "http://agbc-fe.pdn.ac.lk/api/v1/data/?sensor=10008&date="

start_date = pd.to_datetime("2020-10-22")
end_date = pd.to_datetime("2020-11-07")

date_range = pd.date_range(start=start_date, end=end_date, freq="D")

all_data = []

for date in date_range:
    date_str = date.strftime("%Y-%m-%d")
    url = base_url + date_str
    try:
        response = requests.get(url)
        data = response.json()
        all_data.extend(data['data'])
    except:
        print(f"Error: Could not retrieve data for date {date_str}")
        continue
    

df = pd.DataFrame(all_data, dtype=str)
print(df.tail(5))

Error: Could not retrieve data for date 2020-10-31
      siteId  seqNo         date       time  temp1  temp2  temp3 humidity1  \
40494      0   2874   2020-11-07   23:57:33      ?   23.2   22.7         ?   
40495      0   2875   2020-11-07   23:58:01   22.6   23.2   22.7      99.9   
40496      0   2876   2020-11-07   23:58:33      ?   23.3   22.7         ?   
40497      0   2877   2020-11-07   23:59:03      ?   23.2   22.6         ?   
40498      0   2878   2020-11-07   23:59:33      ?   23.1   22.5         ?   

      humidity2 humidity3   light  
40494        95        95   1.042  
40495        95        95   1.042  
40496        95        95   1.042  
40497        95        95   1.042  
40498        95        95   1.042  


In [ ]:
# check for missing values
print(df.isnull().sum())

siteId       0
seqNo        0
date         0
time         0
temp1        0
temp2        0
temp3        0
humidity1    0
humidity2    0
humidity3    0
light        0
dtype: int64


In [ ]:
# drop rows with missing values
df.dropna(inplace=True)

In [ ]:
# Drop duplicate rows
df=df.drop_duplicates(keep='first')

In [ ]:
import numpy as np
# Replace '?' with NaN

df.replace(' ?', np.nan, inplace=True)
# Drop rows containing missing values
# Replace the missing values with NaN

# Drop rows containing 
# Drop rows containing missing values
# Replace the missing values with NaN

#df = df.replace(['?', 'NA', 'N/A', 'None'],0)


print(df.tail(10))


      siteId  seqNo         date       time  temp1  temp2  temp3 humidity1  \
40489      0   2869   2020-11-07   23:55:03    NaN   23.2   22.6       NaN   
40490      0   2870   2020-11-07   23:55:33    NaN   23.2   22.7       NaN   
40491      0   2871   2020-11-07   23:56:03    NaN   23.2   22.7       NaN   
40492      0   2872   2020-11-07   23:56:33    NaN   23.3   22.7       NaN   
40493      0   2873   2020-11-07   23:57:01   22.6   23.2   22.7      99.9   
40494      0   2874   2020-11-07   23:57:33    NaN   23.2   22.7       NaN   
40495      0   2875   2020-11-07   23:58:01   22.6   23.2   22.7      99.9   
40496      0   2876   2020-11-07   23:58:33    NaN   23.3   22.7       NaN   
40497      0   2877   2020-11-07   23:59:03    NaN   23.2   22.6       NaN   
40498      0   2878   2020-11-07   23:59:33    NaN   23.1   22.5       NaN   

      humidity2 humidity3   light  
40489        95        95   1.042  
40490        95        95   1.042  
40491        95        95   1.042

In [ ]:
# Drop rows containing missing values
df = df.dropna()


In [ ]:
# Convert temperature columns to numeric
df['temp1'] = pd.to_numeric(df['temp1'], errors='coerce')
df['temp2'] = pd.to_numeric(df['temp2'], errors='coerce')
df['temp3'] = pd.to_numeric(df['temp3'], errors='coerce')

# Convert temperature columns to numeric
df['humidity1'] = pd.to_numeric(df['humidity1'], errors='coerce')
df['humidity2'] = pd.to_numeric(df['humidity2'], errors='coerce')
df['humidity3'] = pd.to_numeric(df['humidity3'], errors='coerce')

df['seqNo'] = pd.to_numeric(df['seqNo'], errors='coerce')

# Calculate the average temperature
df['average_temp'] = df[['temp1', 'temp2', 'temp3']].mean(axis=1,skipna=True)

# Calculate the average humidity
df['average_humidity'] = df[['humidity1', 'humidity2', 'humidity3']].mean(axis=1,skipna=True)

# Create a new DataFrame with only the desired columns
new_df = df[['seqNo','date','time','average_temp', 'average_humidity', 'light']]

# Print the selected columns
print(new_df)

       seqNo         date       time  average_temp  average_humidity   light
0          1   2020-10-22   00:00:01     23.433333         96.300000   1.042
2          3   2020-10-22   00:00:31     23.433333         96.266667   1.042
3          4   2020-10-22   00:00:46     23.466667         96.200000   1.042
4          5   2020-10-22   00:01:01     23.400000         96.166667   1.042
5          6   2020-10-22   00:01:16     23.433333         96.300000   1.042
...      ...          ...        ...           ...               ...     ...
40478   2858   2020-11-07   23:49:31     23.066667         96.633333   1.042
40486   2866   2020-11-07   23:53:31     22.900000         96.633333   1.042
40487   2867   2020-11-07   23:54:01     22.800000         96.633333   1.042
40493   2873   2020-11-07   23:57:01     22.833333         96.633333   1.042
40495   2875   2020-11-07   23:58:01     22.833333         96.633333   1.042

[25475 rows x 6 columns]


In [ ]:
new_rows = []  # List to store the combined rows
x=0
count=0
average_temp = 0
average_humidity = 0
light = 0
n=6
rowcount=0
for index, row in df.iterrows():
    # Access the values of each column in the current row
    time1 = row['time']

    sliced_text = time1[4:6]  #sliced the time xx:yy:xx
  
    
    if(sliced_text=="00"): #if yy==00
        sliced_text1 = time1[1:6] # get front slice xx:xx:
        if(n>23):
            n=0
            
        
            
    
        if(n <= 9 and sliced_text1 == "0" + str(n) + ":00" ):
            
        
         # Access the values of the current row
            date = row['date']
            time = row['time']
            average_temp = row['average_temp']+average_temp
            average_humidity = row['average_humidity']+average_humidity
            x=row['seqNo']
            count=count+1


        elif( sliced_text1 == str(n) + ":00" ):
            
            
            
         # Access the values of the current row
            date = row['date']
            time = row['time']
            average_temp = row['average_temp']+average_temp
            average_humidity = row['average_humidity']+average_humidity
            x=row['seqNo']
            count=count+1
            
       
        
        



        


        
        else:
            if (count==0 ):

                count=1
       
            
            new_row = {
            'Date': date,
            'Time': time,
            'average_temp': average_temp/count ,
            'average_humidity': average_humidity/count ,
             }
            #print(new_row)
            new_rows.append(new_row)
            
            count=1
            n=n+1
            
    
            
            
            
            date = row['date']
            time = row['time']
            average_temp = row['average_temp']
            average_humidity = row['average_humidity']

new_row = {
            'Date': date,
            'Time': time,
            'average_temp': average_temp/count ,
            'average_humidity': average_humidity/count ,
             }
new_rows.append(new_row)
new_df1 = pd.DataFrame(new_rows)




        
            

 
       
    
print(new_df1)      




In [ ]:

new_df1.to_csv('data.csv', index=False)

from datetime import datetime

start_date = "2022-10-18"
start_datetime = datetime.strptime(start_date, "%Y-%m-%d")
start_unix_time = int(start_datetime.timestamp())

In [ ]:
from datetime import datetime

start_date = "2022-10-18"
start_datetime = datetime.strptime(start_date, "%Y-%m-%d")
start_unix_time = int(start_datetime.timestamp())
print(start_unix_time)

1666031400


In [ ]:
from datetime import datetime

end_date = "2022-10-19"
end_datetime = datetime.strptime(end_date, "%Y-%m-%d")
end_unix_time = int(end_datetime.timestamp())
print(end_unix_time)

1666117800


In [ ]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
import matplotlib 

In [ ]:
df1 = pd.read_csv("weather_data.csv")
df1
df2 = df1[(df1['Date'] > '2020-10-18') & (df1['Date'] <= '2020-10-29')]
df2


Unnamed: 0,Date,Time,Temperature,Feels Like,Pressure,Humidity,Dew Point,Clouds,Wind Speed,Description
24,2020-10-19,00:00:00,22.64,23.44,1009,95,21.80,40,1.00,scattered clouds
25,2020-10-19,01:00:00,22.76,23.47,1010,91,21.21,96,1.73,overcast clouds
26,2020-10-19,02:00:00,24.92,25.69,1010,85,22.22,98,1.83,overcast clouds
27,2020-10-19,03:00:00,25.08,25.63,1009,76,20.55,40,2.60,scattered clouds
28,2020-10-19,04:00:00,25.74,26.59,1011,85,23.03,99,2.65,overcast clouds
...,...,...,...,...,...,...,...,...,...,...
283,2020-10-29,19:00:00,22.82,23.64,1012,95,21.98,94,0.66,overcast clouds
284,2020-10-29,20:00:00,22.18,22.96,1012,96,21.51,97,0.64,overcast clouds
285,2020-10-29,21:00:00,21.62,22.32,1010,95,20.78,40,0.00,scattered clouds
286,2020-10-29,22:00:00,21.29,22.01,1011,97,20.79,96,0.54,overcast clouds
