In [140]:
import requests
import pandas as pd

base_url = "http://agbc-fe.pdn.ac.lk/api/v1/data/?sensor=10008&date="

start_date = pd.to_datetime("2020-10-17")
end_date = pd.to_datetime("2020-10-30")

date_range = pd.date_range(start=start_date, end=end_date, freq="D")

all_data = []

for date in date_range:
    date_str = date.strftime("%Y-%m-%d")
    url = base_url + date_str
    response = requests.get(url)
    data = response.json()
    all_data.extend(data['data'])

df = pd.DataFrame(all_data, dtype=str)
print(df.tail())

      siteId  seqNo         date       time  temp1  temp2  temp3 humidity1  \
41841      0   1667   2020-10-29   13:54:31   32.5   34.3   32.7      66.7   
41842      0   1668   2020-10-29   13:55:01   32.5   34.5   32.7      66.3   
41843      0   1669   2020-10-29   13:55:31   32.5   34.3   32.5      65.2   
41844      0   1670   2020-10-29   13:56:03      ?   34.1   32.3         ?   
41845      0   1671   2020-10-29   13:56:30   32.4     34   32.2        66   

      humidity2 humidity3     light  
41841        56        61   9714.17  
41842        56        61      6820  
41843        54        61   6175.83  
41844        56        63   6005.42  
41845        56        61   6124.58  


In [141]:
# check for missing values
print(df.isnull().sum())

siteId       0
seqNo        0
date         0
time         0
temp1        0
temp2        0
temp3        0
humidity1    0
humidity2    0
humidity3    0
light        0
dtype: int64


In [142]:
# drop rows with missing values
df.dropna(inplace=True)

In [143]:
# Drop duplicate rows
df=df.drop_duplicates(keep='first')

In [144]:
import numpy as np
# Replace '?' with NaN

df.replace(' ?', np.nan, inplace=True)
# Drop rows containing missing values
# Replace the missing values with NaN
df = df.fillna(np.nan)
# Drop rows containing missing values
# Replace the missing values with NaN

#df = df.replace(['?', 'NA', 'N/A', 'None'],0)

df.tail()


Unnamed: 0,siteId,seqNo,date,time,temp1,temp2,temp3,humidity1,humidity2,humidity3,light
41841,0,1667,2020-10-29,13:54:31,32.5,34.3,32.7,66.7,56,61,9714.17
41842,0,1668,2020-10-29,13:55:01,32.5,34.5,32.7,66.3,56,61,6820.0
41843,0,1669,2020-10-29,13:55:31,32.5,34.3,32.5,65.2,54,61,6175.83
41844,0,1670,2020-10-29,13:56:03,,34.1,32.3,,56,63,6005.42
41845,0,1671,2020-10-29,13:56:30,32.4,34.0,32.2,66.0,56,61,6124.58


In [145]:
# Drop rows containing missing values
df = df.dropna()
df.head()

Unnamed: 0,siteId,seqNo,date,time,temp1,temp2,temp3,humidity1,humidity2,humidity3,light
0,0,0,2020-10-17,05:30:16,18.7,19.2,18.8,96.3,93,93,1.25
1,0,1,2020-10-17,05:30:31,18.8,19.2,18.8,96.4,93,93,1.25
3,0,3,2020-10-17,05:31:01,18.7,19.2,18.9,96.8,93,93,1.25
4,0,4,2020-10-17,05:31:16,18.7,19.2,18.8,96.1,93,93,1.25
9,0,9,2020-10-17,05:32:31,18.7,19.2,18.8,95.8,93,93,1.25


In [146]:
# Convert temperature columns to numeric
df['temp1'] = pd.to_numeric(df['temp1'], errors='coerce')
df['temp2'] = pd.to_numeric(df['temp2'], errors='coerce')
df['temp3'] = pd.to_numeric(df['temp3'], errors='coerce')

# Convert temperature columns to numeric
df['humidity1'] = pd.to_numeric(df['humidity1'], errors='coerce')
df['humidity2'] = pd.to_numeric(df['humidity2'], errors='coerce')
df['humidity3'] = pd.to_numeric(df['humidity3'], errors='coerce')

df['seqNo'] = pd.to_numeric(df['seqNo'], errors='coerce')

# Calculate the average temperature
df['average_temp'] = df[['temp1', 'temp2', 'temp3']].mean(axis=1)

# Calculate the average humidity
df['average_humidity'] = df[['humidity1', 'humidity2', 'humidity3']].mean(axis=1)

# Create a new DataFrame with only the desired columns
new_df = df[['seqNo','date','time','average_temp', 'average_humidity', 'light']]

# Print the selected columns
print(new_df)

       seqNo         date       time  average_temp  average_humidity     light
0          0   2020-10-17   05:30:16     18.900000         94.100000      1.25
1          1   2020-10-17   05:30:31     18.933333         94.133333      1.25
3          3   2020-10-17   05:31:01     18.933333         94.266667      1.25
4          4   2020-10-17   05:31:16     18.900000         94.033333      1.25
9          9   2020-10-17   05:32:31     18.900000         93.933333      1.25
...      ...          ...        ...           ...               ...       ...
41840   1666   2020-10-29   13:54:01     32.633333         63.800000   10159.2
41841   1667   2020-10-29   13:54:31     33.166667         61.233333   9714.17
41842   1668   2020-10-29   13:55:01     33.233333         61.100000      6820
41843   1669   2020-10-29   13:55:31     33.100000         60.066667   6175.83
41845   1671   2020-10-29   13:56:30     32.866667         61.000000   6124.58

[27245 rows x 6 columns]


In [153]:
new_rows = []  # List to store the combined rows
x=0
count=0
average_temp = 0
average_humidity = 0
light = 0
n=6
rowcount=0
for index, row in df.iterrows():
    # Access the values of each column in the current row
    time1 = row['time']

    sliced_text = time1[4:6]  #sliced the time xx:yy:xx
  
    
    if(sliced_text=="00"): #if yy==00
        sliced_text1 = time1[1:6] # get front slice xx:xx:
        if(n>23):
            n=0
            
        
            
    
        if(n <= 9 and sliced_text1 == "0" + str(n) + ":00" ):
            
        
         # Access the values of the current row
            date = row['date']
            time = row['time']
            average_temp = row['average_temp']+average_temp
            average_humidity = row['average_humidity']+average_humidity
            x=row['seqNo']
            count=count+1


        elif( sliced_text1 == str(n) + ":00" ):
            
            
            
         # Access the values of the current row
            date = row['date']
            time = row['time']
            average_temp = row['average_temp']+average_temp
            average_humidity = row['average_humidity']+average_humidity
            x=row['seqNo']
            count=count+1
            
       
        
        



        


        
        else:
            if (count==0 ):

                count=1
       
            
            new_row = {
            'date': date,
            'time': time,
            'average_temp': average_temp/count ,
            'average_humidity': average_humidity/count ,
             }
            #print(new_row)
            new_rows.append(new_row)
            
            count=1
            n=n+1
            
    
            
            
            
            date = row['date']
            time = row['time']
            average_temp = row['average_temp']
            average_humidity = row['average_humidity']

new_row = {
            'date': date,
            'time': time,
            'average_temp': average_temp/count ,
            'average_humidity': average_humidity/count ,
             }
new_rows.append(new_row)
new_df1 = pd.DataFrame(new_rows)
print(new_df1.head(30))



        
            

 
       
    
        


           date       time  average_temp  average_humidity
0    2020-10-18   06:00:46     21.288889         94.622222
1    2020-10-18   07:00:00     22.500000         94.400000
2    2020-10-18   08:00:46     30.125000         69.733333
3    2020-10-18   09:00:31     29.416667         76.716667
4    2020-10-18   10:00:46     36.900000         46.283333
5    2020-10-18   11:00:45     39.800000         41.383333
6    2020-10-18   12:00:46     32.958333         54.591667
7    2020-10-18   13:00:46     38.775000         41.483333
8    2020-10-18   14:00:46     41.058333         37.033333
9    2020-10-18   15:00:46     35.483333         48.158333
10   2020-10-18   16:00:46     32.841667         58.366667
11   2020-10-18   17:00:46     30.150000         69.550000
12   2020-10-18   18:00:46     26.133333         91.116667
13   2020-10-18   19:00:46     24.633333         94.633333
14   2020-10-18   20:00:46     24.122222         95.288889
15   2020-10-18   21:00:01     23.433333         95.9333

from datetime import datetime

start_date = "2022-10-18"
start_datetime = datetime.strptime(start_date, "%Y-%m-%d")
start_unix_time = int(start_datetime.timestamp())

In [148]:
from datetime import datetime

start_date = "2022-10-18"
start_datetime = datetime.strptime(start_date, "%Y-%m-%d")
start_unix_time = int(start_datetime.timestamp())
print(start_unix_time)

1666031400


In [149]:
from datetime import datetime

end_date = "2022-10-19"
end_datetime = datetime.strptime(end_date, "%Y-%m-%d")
end_unix_time = int(end_datetime.timestamp())
print(end_unix_time)

1666117800
