In [483]:
import requests
import pandas as pd

base_url = "http://agbc-fe.pdn.ac.lk/api/v1/data/?sensor=10008&date="

start_date = pd.to_datetime("2020-10-17")
end_date = pd.to_datetime("2020-10-29")

date_range = pd.date_range(start=start_date, end=end_date, freq="D")

all_data = []

for date in date_range:
    date_str = date.strftime("%Y-%m-%d")
    url = base_url + date_str
    response = requests.get(url)
    data = response.json()
    all_data.extend(data['data'])

df = pd.DataFrame(all_data, dtype=str)
print(df.tail(5))

      siteId  seqNo         date       time  temp1  temp2  temp3 humidity1  \
41841      0   1667   2020-10-29   13:54:31   32.5   34.3   32.7      66.7   
41842      0   1668   2020-10-29   13:55:01   32.5   34.5   32.7      66.3   
41843      0   1669   2020-10-29   13:55:31   32.5   34.3   32.5      65.2   
41844      0   1670   2020-10-29   13:56:03      ?   34.1   32.3         ?   
41845      0   1671   2020-10-29   13:56:30   32.4     34   32.2        66   

      humidity2 humidity3     light  
41841        56        61   9714.17  
41842        56        61      6820  
41843        54        61   6175.83  
41844        56        63   6005.42  
41845        56        61   6124.58  


In [484]:
# check for missing values
print(df.isnull().sum())

siteId       0
seqNo        0
date         0
time         0
temp1        0
temp2        0
temp3        0
humidity1    0
humidity2    0
humidity3    0
light        0
dtype: int64


In [485]:
# drop rows with missing values
df.dropna(inplace=True)

In [486]:
# Drop duplicate rows
df=df.drop_duplicates(keep='first')

In [487]:
import numpy as np
# Replace '?' with NaN

df.replace(' ?', np.nan, inplace=True)
# Drop rows containing missing values
# Replace the missing values with NaN
df = df.fillna(np.nan)
# Drop rows containing missing values
# Replace the missing values with NaN

#df = df.replace(['?', 'NA', 'N/A', 'None'],0)


print(df.tail(10))


      siteId  seqNo         date       time  temp1  temp2  temp3 humidity1  \
41836      0   1662   2020-10-29   13:52:01     32   33.2   31.5      65.2   
41837      0   1663   2020-10-29   13:52:31     32   33.2   31.4      68.1   
41838      0   1664   2020-10-29   13:53:01   31.9   33.2   31.5      66.1   
41839      0   1665   2020-10-29   13:53:31   31.9   33.2   31.5      65.9   
41840      0   1666   2020-10-29   13:54:01   32.2   33.6   32.1      66.4   
41841      0   1667   2020-10-29   13:54:31   32.5   34.3   32.7      66.7   
41842      0   1668   2020-10-29   13:55:01   32.5   34.5   32.7      66.3   
41843      0   1669   2020-10-29   13:55:31   32.5   34.3   32.5      65.2   
41844      0   1670   2020-10-29   13:56:03    NaN   34.1   32.3       NaN   
41845      0   1671   2020-10-29   13:56:30   32.4     34   32.2        66   

      humidity2 humidity3     light  
41836        57        64   5610.42  
41837        58        67   5136.25  
41838        58        64  

In [488]:
# Drop rows containing missing values
df = df.dropna()


In [489]:
# Convert temperature columns to numeric
df['temp1'] = pd.to_numeric(df['temp1'], errors='coerce')
df['temp2'] = pd.to_numeric(df['temp2'], errors='coerce')
df['temp3'] = pd.to_numeric(df['temp3'], errors='coerce')

# Convert temperature columns to numeric
df['humidity1'] = pd.to_numeric(df['humidity1'], errors='coerce')
df['humidity2'] = pd.to_numeric(df['humidity2'], errors='coerce')
df['humidity3'] = pd.to_numeric(df['humidity3'], errors='coerce')

df['seqNo'] = pd.to_numeric(df['seqNo'], errors='coerce')

# Calculate the average temperature
df['average_temp'] = df[['temp1', 'temp2', 'temp3']].mean(axis=1)

# Calculate the average humidity
df['average_humidity'] = df[['humidity1', 'humidity2', 'humidity3']].mean(axis=1)

# Create a new DataFrame with only the desired columns
new_df = df[['seqNo','date','time','average_temp', 'average_humidity', 'light']]

# Print the selected columns
print(new_df)

       seqNo         date       time  average_temp  average_humidity     light
0          0   2020-10-17   05:30:16     18.900000         94.100000      1.25
1          1   2020-10-17   05:30:31     18.933333         94.133333      1.25
3          3   2020-10-17   05:31:01     18.933333         94.266667      1.25
4          4   2020-10-17   05:31:16     18.900000         94.033333      1.25
9          9   2020-10-17   05:32:31     18.900000         93.933333      1.25
...      ...          ...        ...           ...               ...       ...
41840   1666   2020-10-29   13:54:01     32.633333         63.800000   10159.2
41841   1667   2020-10-29   13:54:31     33.166667         61.233333   9714.17
41842   1668   2020-10-29   13:55:01     33.233333         61.100000      6820
41843   1669   2020-10-29   13:55:31     33.100000         60.066667   6175.83
41845   1671   2020-10-29   13:56:30     32.866667         61.000000   6124.58

[27245 rows x 6 columns]


In [507]:
new_rows = []  # List to store the combined rows
x=0
count=0
average_temp = 0
average_humidity = 0
light = 0
n=6
rowcount=0
start=0
start_date=18
for index, row in df.iterrows():
    # Access the values of each column in the current row
    time1 = row['time']
    date1=row['date']
    sliced_date=date1[9:11] 
    

   
       

    sliced_text = time1[4:6]  #sliced the time xx:yy:xx
    

    
    if(sliced_text=="00"):
       
       
        #if yy==00
        sliced_text1 = time1[1:6] # get front slice xx:xx:
        

        if(start_date!=int(sliced_date)):
            
            start_date=int(sliced_date)
            
            start=0
            
           
        

    
        if(start==0):
            sliced_hour = time1[1:3]
            n=int(sliced_hour)
            start=1















        if(n>23):
            n=0
            
        
            
    
        if(n <= 9 and sliced_text1 == "0" + str(n) + ":00" ):
            
        
         # Access the values of the current row
            date = row['date']
            time = row['time']
            average_temp = row['average_temp']+average_temp
            average_humidity = row['average_humidity']+average_humidity
            x=row['seqNo']
            count=count+1


        elif( sliced_text1 == str(n) + ":00" ):
            
            
            
         # Access the values of the current row
            date = row['date']
            time = row['time']
            average_temp = row['average_temp']+average_temp
            average_humidity = row['average_humidity']+average_humidity
            x=row['seqNo']
            count=count+1
            
       
        
        



        


        
        else:
            if (count==0 ):

                count=1
       
            
            new_row = {
            'Date': date,
            'Time': time,
            'average_temp': average_temp/count ,
            'average_humidity': average_humidity/count ,
             }
            #print(new_row)
            new_rows.append(new_row)
            
            count=1
            n=n+1
            
    
            
            
            
            date = row['date']
            time = row['time']
            average_temp = row['average_temp']
            average_humidity = row['average_humidity']

new_row = {
            'Date': date,
            'Time': time,
            'average_temp': average_temp/count ,
            'average_humidity': average_humidity/count ,
             }
new_rows.append(new_row)
new_df1 = pd.DataFrame(new_rows)

print(new_df1.tail(60))


        
            

 
       
    
     


            Date       Time  average_temp  average_humidity
181   2020-10-26   12:00:31     39.666667         41.733333
182   2020-10-26   13:00:30     33.516667         62.533333
183   2020-10-26   14:00:31     33.616667         60.366667
184   2020-10-26   15:00:31     33.950000         59.066667
185   2020-10-26   16:00:00     31.166667         73.633333
186   2020-10-26   17:00:31     27.966667         92.300000
187   2020-10-26   18:00:31     25.533333         95.566667
188   2020-10-26   20:00:31     24.500000         96.633333
189   2020-10-27   01:00:31     24.316667         96.633333
190   2020-10-27   02:00:01     23.933333         96.633333
191   2020-10-27   04:00:01     23.400000         96.633333
192   2020-10-27   06:00:00     22.733333         96.633333
193   2020-10-27   06:00:30     22.700000         96.633333
194   2020-10-27   08:00:00     27.933333         93.200000
195   2020-10-27   08:00:31     27.900000         93.533333
196   2020-10-27   09:00:00     31.50000

from datetime import datetime

start_date = "2022-10-18"
start_datetime = datetime.strptime(start_date, "%Y-%m-%d")
start_unix_time = int(start_datetime.timestamp())

In [491]:
from datetime import datetime

start_date = "2022-10-18"
start_datetime = datetime.strptime(start_date, "%Y-%m-%d")
start_unix_time = int(start_datetime.timestamp())
print(start_unix_time)

1666031400


In [492]:
from datetime import datetime

end_date = "2022-10-19"
end_datetime = datetime.strptime(end_date, "%Y-%m-%d")
end_unix_time = int(end_datetime.timestamp())
print(end_unix_time)

1666117800


In [493]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
import matplotlib 

In [494]:
df1 = pd.read_csv("weather_data.csv")
df1
df2 = df1[(df1['Date'] > '2020-10-18') & (df1['Date'] <= '2020-10-29')]
df2


Unnamed: 0,Date,Time,Temperature,Feels Like,Pressure,Humidity,Dew Point,Clouds,Wind Speed,Description
24,2020-10-19,00:00:00,22.64,23.44,1009,95,21.80,40,1.00,scattered clouds
25,2020-10-19,01:00:00,22.76,23.47,1010,91,21.21,96,1.73,overcast clouds
26,2020-10-19,02:00:00,24.92,25.69,1010,85,22.22,98,1.83,overcast clouds
27,2020-10-19,03:00:00,25.08,25.63,1009,76,20.55,40,2.60,scattered clouds
28,2020-10-19,04:00:00,25.74,26.59,1011,85,23.03,99,2.65,overcast clouds
...,...,...,...,...,...,...,...,...,...,...
283,2020-10-29,19:00:00,22.82,23.64,1012,95,21.98,94,0.66,overcast clouds
284,2020-10-29,20:00:00,22.18,22.96,1012,96,21.51,97,0.64,overcast clouds
285,2020-10-29,21:00:00,21.62,22.32,1010,95,20.78,40,0.00,scattered clouds
286,2020-10-29,22:00:00,21.29,22.01,1011,97,20.79,96,0.54,overcast clouds
