In [2]:
import requests
import pandas as pd
import sys
from statistics import mean
from datetime import datetime
from time import sleep

In [2]:
def api_call(location, start_date, end_date):
    print('Fetching data for location: {} from {} to {}'.format(location, start_date, end_date))
    #Parameters Example
    # location = 'Thoai_Son' or '38.9697,-77.385'
    # start_date = '2022-03-18'
    # end_date = '2022-07-25'
    key='X4AWPL9FZRZ8BGWTF2QMBKPRW'
    response = requests.request("GET", "https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/{}/{}/{}?unitGroup=metric&include=days&key={}&contentType=json".format(location, start_date, end_date, key ))
    # while response.status_code == 429:
    #     print('Status Code: {}'.format(response.status_code))
    #     print('Request limit reached.. pausing for some time and attempting to resume afterwards')
    #     #sleep(60*60*3) # 3 hours # propably requires more
    #     sys.exit()
    #     response = requests.request("GET", "https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/{}/{}/{}?unitGroup=metric&include=days&key=X4AWPL9FZRZ8BGWTF2QMBKPRW&contentType=json".format(location, start_date, end_date ))
    if response.status_code != 200:
      print('Unexpected Status code: ', response.status_code)
      return None

    # Parse the results as JSON
    jsonData = response.json()
    return jsonData['days']

In [3]:
def range_constructor(num):
    '''Takes the number of days and splits it to to 14 sections'''
    part_size = num // 14
    ranges = []
    start = 0
    for _ in range(13):
        new_range = (start , start + part_size)
        ranges.append(new_range)
        start += part_size
    ranges.append((start, num))
    return ranges

In [4]:
def convert_to_seconds(string):
    '''Converts a time of day to seconds'''
    hours = int(string[0:2])
    mins = int(string[3:5])
    secs = int(string[6:8])
    return (hours*60+mins)*60+secs

In [5]:
def transform_to_row(pandas_dataframe):
    '''This function performs a form of aggregation where every column of a dataframe
    is split into 14 sections. Then an aggregation is performed in each section
    to produce one value. Returns a row of 14*number_of_columns elements'''
    result = []
    ranges = range_constructor(len(pandas_dataframe))
    #For these features, average is performed
    temp_list = []
    dew_list = []
    humidity_list = []
    sunlight_duration_list = []
    precip_list = []
    precipcover_list = []
    windgust_list = []
    windspeed_list = []
    pressure_list = []
    cloudcover_list = []
    solarradiation_list = []
    solarenergy_list = []
    uvindex_list = []
    
    #For these features, max or min is performed
    temp_max_list = []
    temp_min_list = []
    
    for r in ranges:
        target = pandas_dataframe.loc[r[0]:r[1]]
        temp_list.append(mean(target['temp']))
        dew_list.append(mean(target['dew']))
        humidity_list.append(mean(target['humidity']))
        sunlight_duration_list.append(mean(target['sunset']-target['sunrise']))
        precip_list.append(mean(target['precip']))
        precipcover_list.append(mean(target['precipcover']))
        windgust_list.append(mean(target['windgust']))
        windspeed_list.append(mean(target['windspeed']))
        pressure_list.append(mean(target['pressure']))
        cloudcover_list.append(mean(target['cloudcover']))
        solarradiation_list.append(mean(target['solarradiation']))
        solarenergy_list.append(mean(target['solarenergy']))
        uvindex_list.append(mean(target['uvindex']))
        
        temp_max_list.append(max(target['tempmax']))
        temp_min_list.append(min(target['tempmin']))
    
    result += temp_max_list
    result += temp_min_list
    
    result += temp_list
    result += dew_list
    result += humidity_list
    result += precip_list
    result += precipcover_list
    result += windgust_list
    result += windspeed_list
    result += pressure_list
    result += cloudcover_list
    result += solarradiation_list
    result += solarenergy_list
    result += uvindex_list
    result += sunlight_duration_list
    
    
    return result

In [6]:
data=pd.read_csv('C:/Users/Admin/Desktop/CE778/project/CSISA_IND_LDS_Rice_2018_Data.csv', sep=',')

#to change date format of column 'Date of Harvest'
to_date = lambda x: datetime.strptime(x, '%d-%m-%Y').date()
features=['tempmax', 'tempmin', 'temp', 'dew', 'humidity', 'precip',
          'precipcover', 'windgust', 'windspeed','pressure', 'cloudcover',
          'solarradiation', 'solarenergy', 'uvindex', 'sunrise', 'sunset']


  data=pd.read_csv('C:/Users/Admin/Desktop/CE778/project/CSISA_IND_LDS_Rice_2018_Data.csv', sep=',')


In [20]:
try:
    final_weather_data = pd.read_csv('C:/Users/Admin/Desktop/CE778/project/Weather_Data.csv')
    print('File Found, appending...')

#Else create new Dataframe
except FileNotFoundError:
    print('No file found.. Creating File..')
    Final_features = ['Latitude', 'Longitude', 'A-q117_season']
    for feat in (features[:-2] + ['Sunlight duration']):
        for week in range(1,15):
            Final_features.append('{} section {}'.format(feat, week))
            
    final_weather_data = pd.DataFrame(columns=Final_features)

#Checkpoint will be 0 if there is no file
checkpoint = len(final_weather_data)

File Found, appending...


In [21]:
fetched_data = {}
for row in data.values[checkpoint:]:
    lat = row[215]
    long = row[216]
    
    end_date = to_date(row[195])
    
    #Below I find or assume the date of seeding (assumption is explained on the report and presentaion)
    
    #'WS' case
    if row[3] == 'Rabi' :
        start_date = to_date('01-11-2018')
        
    #'SA' case
    else:
        temp = data[(data['Latitude']==lat) \
                    & (data['Longitude']==long) \
                    & (data['A-q117_season']=='Rabi')]
        #If there is a Date of harvest on the 'WS' season of the location and is after 1-04-2022 it is used
        #else 1-04-2022 is used as the date of seeding
        if len(temp) == 1:
            temp_date = to_date(temp.values[0][5])
            start_date = max(temp_date, to_date('01-04-2019'))
        else:
            start_date = to_date('01-04-2019')
            
    location  = str(lat) +',' +  str(long)
    fetced_data_key = (lat, long, row[3])
    fetced_row_data = api_call(location, start_date, end_date)
    if fetced_row_data == None : 
        if len(fetched_data) == 0 : sys.exit()
        break #Stop when no more data can be collected
    fetched_data[fetced_data_key] = fetced_row_data
    
if fetced_row_data!= None:
    print('All data has been collected... good job')

ValueError: time data '05-12-18' does not match format '%d-%m-%Y'

In [23]:
!pip install openmeteo-requests
!pip install requests-cache retry-requests numpy pandas

Collecting openmeteo-requests
  Downloading openmeteo_requests-1.2.0-py3-none-any.whl (5.5 kB)
Collecting openmeteo-sdk>=1.4.0
  Downloading openmeteo_sdk-1.11.4-py3-none-any.whl (12 kB)
Collecting flatbuffers>=24.0.0
  Downloading flatbuffers-24.3.25-py2.py3-none-any.whl (26 kB)
Installing collected packages: flatbuffers, openmeteo-sdk, openmeteo-requests
  Attempting uninstall: flatbuffers
    Found existing installation: flatbuffers 2.0.7
    Uninstalling flatbuffers-2.0.7:
      Successfully uninstalled flatbuffers-2.0.7
Successfully installed flatbuffers-24.3.25 openmeteo-requests-1.2.0 openmeteo-sdk-1.11.4
Collecting requests-cache
  Downloading requests_cache-1.2.0-py3-none-any.whl (61 kB)
Collecting retry-requests
  Downloading retry_requests-2.0.0-py3-none-any.whl (15 kB)
Collecting url-normalize>=1.4
  Downloading url_normalize-1.4.3-py2.py3-none-any.whl (6.8 kB)
Collecting cattrs>=22.2
  Downloading cattrs-23.2.3-py3-none-any.whl (57 kB)
Collecting exceptiongroup>=1.1.1
  Do

In [24]:
import openmeteo_requests

import requests_cache
import pandas as pd
from retry_requests import retry

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": 16.14,
	"longitude": 80.15,
	"start_date": "2018-04-02",
	"end_date": "2018-04-16",
	"daily": ["weather_code","temperature_2m_max","temperature_2m_min","apparent_temperature_max","apparent_temperature_min","precipitation_sum","rain_sum","snowfall_sum","precipitation_hours","sunrise","sunset","sunshine_duration","daylight_duration","wind_speed_10m_max","wind_gusts_10m_max","wind_direction_10m_dominant","shortwave_radiation_sum","et0_fao_evapotranspiration"]
}
responses = openmeteo.weather_api(url, params=params)
daily = responses[0].Daily()
daily_temperature_2m = daily.Variables(5).ValuesAsNumpy()
df = pd.DataFrame(daily_temperature_2m, columns=['Temperature_2m_max'])
print(df)

    Temperature_2m_max
0                  5.2
1                  3.4
2                  0.0
3                  0.0
4                  0.0
5                  0.0
6                  1.3
7                  0.2
8                  0.0
9                  0.1
10                 0.2
11                 0.0
12                 0.1
13                 0.0
14                 2.1


In [30]:
# CSV file path
import csv
from datetime import datetime
csv_file = "C:/Users/Admin/Desktop/CE778/project/sentinel1_rtc_new_2000_2.csv"
import pandas as pd
df = pd.read_csv(csv_file)
print(df['Latitude'].dtype)
# List to store the weather data for each field
weather_data_list = []

float64


In [13]:
t=1
import time
with open(csv_file, "r") as file:
    reader = csv.DictReader(file)
    for row in reader:
        latitude = float(row["Latitude"])
        longitude = float(row["Longitude"])
        sowing_date = datetime.strptime(row["Sowing"], "%d-%m-%y").strftime("%Y-%m-%d")
        harvest_date = datetime.strptime(row["Harvesting"], "%d-%m-%y").strftime("%Y-%m-%d")

        url = "https://archive-api.open-meteo.com/v1/archive"
    
        params = {
            "latitude": latitude,
            "longitude": longitude,
            "start_date": sowing_date,
            "end_date": harvest_date,
            "daily": ["temperature_2m_max", "temperature_2m_min",
                      "precipitation_sum", "precipitation_hours", "sunshine_duration",
                      "daylight_duration","et0_fao_evapotranspiration"]
        }

        responses = openmeteo.weather_api(url, params=params)
    
        
        daily = responses[0].Daily()

        temperature_2m_max = daily.Variables(0).ValuesAsNumpy().max()
        temperature_2m_min = daily.Variables(1).ValuesAsNumpy().min()
        precipitation_sum = daily.Variables(2).ValuesAsNumpy().sum()
        precipitation_hours = daily.Variables(3).ValuesAsNumpy().mean()
        sunshine_duration = daily.Variables(4).ValuesAsNumpy().mean()
        daylight_duration = daily.Variables(5).ValuesAsNumpy().mean()
        uvindex = daily.Variables(6).ValuesAsNumpy().mean()
        et0_fao_evapotranspiration = daily.Variables(7).ValuesAsNumpy().mean()

        field_data = {
            "latitude": latitude,
            "longitude": longitude,
            "temperature_2m_max": temperature_2m_max,
            "temperature_2m_min": temperature_2m_min,
            "precipitation_sum": precipitation_sum,
            "evapotranspiration": et0_fao_evapotranspiration,
            "precipitation_hours": precipitation_hours,
            "sunshine_duration": (sunshine_duration/3600),
            "daylight_duration": (daylight_duration/3600)
        }

        weather_data_list.append(field_data)

# Create a DataFrame from the weather data list
weather_data_df = pd.DataFrame(weather_data_list)
# Save the DataFrame to a CSV file
weather_data_df.to_csv("C:/Users/Admin/Desktop/CE778/project/ind_weather_data_2000_1.csv", index=False)


error: unpack_from requires a buffer of at least 4294965952 bytes for unpacking 4 bytes at offset 4294965948 (actual buffer size is 3672)

In [31]:
import csv
import pandas as pd
from datetime import datetime
import time

# Define the function to make API calls and process data
def process_data(csv_file):
    weather_data_list = []
    counter = 0
  # Define the batch size
    with open(csv_file, "r") as file:
        reader = csv.DictReader(file)
            
        for row in reader:
            if counter == 50:  # Check if the counter reaches 100
                print("Processed 50 rows. Pausing for 1 minute...")
                time.sleep(60)  # Pause for 2 minutes
                counter = 0  # Reset the counter to start processing the next 100 rows
            latitude = float(row["Latitude"])
            longitude = float(row["Longitude"])
            sowing_date = datetime.strptime(row["Sowing"], "%d-%m-%y").strftime("%Y-%m-%d")
            harvest_date = datetime.strptime(row["Harvesting"], "%d-%m-%y").strftime("%Y-%m-%d")

            url = "https://archive-api.open-meteo.com/v1/archive"

            params = {
                    "latitude": latitude,
                    "longitude": longitude,
                    "start_date": sowing_date,
                    "end_date": harvest_date,
                    "daily": ["temperature_2m_max", "temperature_2m_min", "apparent_temperature_max", "apparent_temperature_min",
                              "precipitation_sum", "precipitation_hours", "sunshine_duration",
                              "daylight_duration","et0_fao_evapotranspiration"]
                }

            responses = openmeteo.weather_api(url, params=params)
                
                # Process API response
            daily = responses[0].Daily()

            temperature_2m_max = daily.Variables(0).ValuesAsNumpy().max()
            temperature_2m_min = daily.Variables(1).ValuesAsNumpy().min()
            apparent_temperature_max = daily.Variables(2).ValuesAsNumpy().max()
            apparent_temperature_min = daily.Variables(3).ValuesAsNumpy().min()
            precipitation_sum = daily.Variables(4).ValuesAsNumpy().sum()
            precipitation_hours = daily.Variables(5).ValuesAsNumpy().mean()
            sunshine_duration = daily.Variables(6).ValuesAsNumpy().mean()
            daylight_duration = daily.Variables(7).ValuesAsNumpy().mean()
            et0_fao_evapotranspiration = daily.Variables(8).ValuesAsNumpy().mean()

            field_data = {
                    "Sowing":sowing_date,
                    "Harvesting":harvest_date,
                    "latitude": latitude,
                    "longitude": longitude,
                    "temperature_2m_max": temperature_2m_max,
                    "temperature_2m_min": temperature_2m_min,
                    "apparent_temperature_max": apparent_temperature_max,
                    "apparent_temperature_min": apparent_temperature_min,
                    "precipitation_sum": precipitation_sum,
                    "precipitation_hours": precipitation_hours,
                    "sunshine_duration": (sunshine_duration/3600),
                    "daylight_duration": (daylight_duration/3600),
                    "evapotranspiration": et0_fao_evapotranspiration
                }

            weather_data_list.append(field_data)
            counter = counter+1
            # Pause for a minute after processing each batch
#             if batch_num < num_batches - 1:
#                 time.sleep(60)
    
    return weather_data_list

# Call the function to process data
weather_data_list = process_data(csv_file)

# Create a DataFrame from the weather data list
weather_data_df = pd.DataFrame(weather_data_list)

# Save the DataFrame to a CSV file
weather_data_df.to_csv("C:/Users/Admin/Desktop/CE778/project/ind_weather_data_2000_2.csv", index=False)


Processed 50 rows. Pausing for 1 minute...
Processed 50 rows. Pausing for 1 minute...
Processed 50 rows. Pausing for 1 minute...
