In [None]:
import pandas as pd
import numpy as np

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


# **Combining all of the dataframes**

In [None]:
metadata_df = pd.read_csv("/content/drive/MyDrive/Data 298B Project Data/Rice Image Datasets - with Location and Time/Rice Leaf Diseases - Taiwan Filtered/image_metadata_taiwan_filtered_location.csv")

weather_df = pd.read_csv("/content/drive/MyDrive/Data 298B Project Data/Rice Image Datasets - with Location and Time/Rice Leaf Diseases - Taiwan Filtered/weather_data_taiwan.csv")

modis_df = pd.read_csv("/content/drive/MyDrive/Data 298B Project Data/Rice Image Datasets - with Location and Time/Rice Leaf Diseases - Taiwan Filtered/remote_sensing_modis_taiwan.csv")

In [None]:
# Creating rounded latitude and longitude in metadata_df and modis_df for matching to the weather data (since latitude and longitude are rounded to 2 decimal places in weather_df)
metadata_df['Rounded_Latitude'] = metadata_df['Latitude'].round(2)
metadata_df['Rounded_Longitude'] = metadata_df['Longitude'].round(2)
modis_df['Rounded_Latitude'] = modis_df['Latitude'].round(2)
modis_df['Rounded_Longitude'] = modis_df['Longitude'].round(2)

# Ensuring the 'Date' columns are in the same format (YYYY-MM-DD)
metadata_df['Date'] = pd.to_datetime(metadata_df['Date']).dt.strftime('%Y-%m-%d')
weather_df['Date'] = pd.to_datetime(weather_df['Date']).dt.strftime('%Y-%m-%d')
modis_df['Date'] = pd.to_datetime(modis_df['Date']).dt.strftime('%Y-%m-%d')

# Merging metadata_df with weather_df
combined_df = pd.merge(metadata_df, weather_df, left_on=['Rounded_Latitude', 'Rounded_Longitude', 'Date'], right_on=['Latitude', 'Longitude', 'Date'], how='left', suffixes=('', '_weather'))

# Dropping the rounded and duplicate columns from the weather_df merge
combined_df.drop(columns=['Rounded_Latitude', 'Rounded_Longitude', 'Latitude_weather', 'Longitude_weather'], inplace=True)

# Merging the combined_df with modis_df based on the exact Latitude, Longitude, and Date match
final_combined_df = pd.merge(combined_df, modis_df, on=['Latitude', 'Longitude', 'Date'], how='left', suffixes=('', '_modis'))

# Dropping the rounded and duplicate columns from the final merge
final_combined_df.drop(columns=['Rounded_Latitude', 'Rounded_Longitude'], inplace=True)

In [None]:
final_combined_df.head()

Unnamed: 0,Id,Latitude,Longitude,Date,Class,Date and Time,Avg Temp 14d,Avg Humidity 14d,Total Precipitation 14d,Avg Wind Speed 14d,NDVI MODIS,NDVI - 1 MODIS,NDVI - 2 MODIS,EVI MODIS,EVI - 1 MODIS,EVI - 2 MODIS
0,P_20181227_153331_vHDR_Auto.jpg,24.073258,120.661451,2018-12-27,Brown Spot,2018:12:27 15:33:31,19.328571,76.664286,5.7,29.171429,0.316,0.3335,0.2184,0.2176,0.1857,0.1328
1,P_20181227_153343_vHDR_Auto (1).jpg,24.073258,120.661451,2018-12-27,Brown Spot,2018:12:27 15:33:43,19.328571,76.664286,5.7,29.171429,0.316,0.3335,0.2184,0.2176,0.1857,0.1328
2,P_20181227_153711_vHDR_Auto.jpg,24.073297,120.661364,2018-12-27,Brown Spot,2018:12:27 15:37:11,19.328571,76.664286,5.7,29.171429,0.316,0.3335,0.2184,0.2176,0.1857,0.1328
3,P_20181227_153709_vHDR_Auto.jpg,24.073297,120.661364,2018-12-27,Brown Spot,2018:12:27 15:37:09,19.328571,76.664286,5.7,29.171429,0.316,0.3335,0.2184,0.2176,0.1857,0.1328
4,P_20181227_154446_vHDR_Auto (1).jpg,24.07435,120.661598,2018-12-27,Brown Spot,2018:12:27 15:44:46,19.328571,76.664286,5.7,29.171429,0.316,0.3335,0.2184,0.2176,0.1857,0.1328


In [None]:
final_combined_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 829 entries, 0 to 828
Data columns (total 18 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Id                       829 non-null    object 
 1   Latitude                 829 non-null    float64
 2   Longitude                829 non-null    float64
 3   Date                     829 non-null    object 
 4   Class                    829 non-null    object 
 5   Date and Time            829 non-null    object 
 6   Avg Temp 14d             829 non-null    float64
 7   Avg Humidity 14d         829 non-null    float64
 8   Total Precipitation 14d  829 non-null    float64
 9   Avg Wind Speed 14d       829 non-null    float64
 10  NDVI MODIS               829 non-null    float64
 11  NDVI - 1 MODIS           829 non-null    float64
 12  NDVI - 2 MODIS           829 non-null    float64
 13  EVI MODIS                829 non-null    float64
 14  EVI - 1 MODIS            8

In [None]:
# Exporting the combined dataframe to a new CSV file
final_combined_df.to_csv("/content/drive/MyDrive/Data 298B Project Data/Rice Image Datasets - with Location and Time/Rice Leaf Diseases - Taiwan Filtered/combined_data_taiwan.csv", index=False)

# **Adding Indicators Based on Thresholds**

In [None]:
df = pd.read_csv("/content/drive/MyDrive/Data 298B Project Data/Rice Image Datasets - with Location and Time/Rice Leaf Diseases - Taiwan Filtered/combined_data_taiwan.csv")

In [None]:
'''
# Indicators for weather data  # change thresholds for each column as needed

# Adding a threshold column based on "Avg Temp 14d"
df['Temp Threshold'] = np.where(df['Avg Temp 14d'] > 25, 1, 0)

# Adding a threshold column based on "Avg Humidity 14d"
df['Humidity Threshold'] = np.where(df['Avg Humidity 14d'] > 75, 1, 0)

# Adding a threshold column based on "Total Precipitation 14d"
df['Humidity Threshold'] = np.where(df['Total Precipitation 14d'] > 50, 1, 0)

# Adding a threshold column based on "Avg Wind Speed 14d"
df['Wind Threshold'] = np.where(df['Avg Wing Speed 14d'] > 10, 1, 0)
'''

In [None]:
# Indicators for remote sensing data

# Adding the "NDVI 1 Decrease" column based on comparing "NDVI MODIS" and "NDVI - 1 MODIS"
df['NDVI 1 Decrease'] = np.where(df['NDVI MODIS'] < df['NDVI - 1 MODIS'], 1, 0)

# Adding the "NDVI 2 Decrease" column based on comparing "NDVI MODIS" and "NDVI - 2 MODIS"
df['NDVI 2 Decrease'] = np.where(df['NDVI MODIS'] < df['NDVI - 2 MODIS'], 1, 0)

# Adding the "EVI 1 Decrease" column based on comparing "EVI MODIS" and "EVI - 1 MODIS"
df['EVI 1 Decrease'] = np.where(df['EVI MODIS'] < df['EVI - 1 MODIS'], 1, 0)

# Adding the "EVI 2 Decrease" column based on comparing "EVI MODIS" and "EVI - 2 MODIS"
df['EVI 2 Decrease'] = np.where(df['EVI MODIS'] < df['EVI - 2 MODIS'], 1, 0)

In [None]:
df.head()

Unnamed: 0,Id,Latitude,Longitude,Date,Class,Date and Time,Avg Temp 14d,Avg Humidity 14d,Total Precipitation 14d,Avg Wind Speed 14d,...,NDVI - 1 MODIS,NDVI - 2 MODIS,EVI MODIS,EVI - 1 MODIS,EVI - 2 MODIS,Humidity Threshold,NDVI 1 Decrease,NDVI 2 Decrease,EVI 1 Decrease,EVI 2 Decrease
0,P_20181227_153331_vHDR_Auto.jpg,24.073258,120.661451,2018-12-27,Brown Spot,2018:12:27 15:33:31,19.328571,76.664286,5.7,29.171429,...,0.3335,0.2184,0.2176,0.1857,0.1328,1,1,0,0,0
1,P_20181227_153343_vHDR_Auto (1).jpg,24.073258,120.661451,2018-12-27,Brown Spot,2018:12:27 15:33:43,19.328571,76.664286,5.7,29.171429,...,0.3335,0.2184,0.2176,0.1857,0.1328,1,1,0,0,0
2,P_20181227_153711_vHDR_Auto.jpg,24.073297,120.661364,2018-12-27,Brown Spot,2018:12:27 15:37:11,19.328571,76.664286,5.7,29.171429,...,0.3335,0.2184,0.2176,0.1857,0.1328,1,1,0,0,0
3,P_20181227_153709_vHDR_Auto.jpg,24.073297,120.661364,2018-12-27,Brown Spot,2018:12:27 15:37:09,19.328571,76.664286,5.7,29.171429,...,0.3335,0.2184,0.2176,0.1857,0.1328,1,1,0,0,0
4,P_20181227_154446_vHDR_Auto (1).jpg,24.07435,120.661598,2018-12-27,Brown Spot,2018:12:27 15:44:46,19.328571,76.664286,5.7,29.171429,...,0.3335,0.2184,0.2176,0.1857,0.1328,1,1,0,0,0


In [None]:
#df.to_csv("/content/drive/MyDrive/Data 298B Project Data/Rice Image Datasets - with Location and Time/Rice Leaf Diseases - Taiwan Filtered/combined_data_taiwan_with_indicators.csv", index=False)

In [None]:
#metadata_df = pd.read_csv("/content/drive/MyDrive/Data 298B Project Data/Rice Image Datasets - with Location and Time/Rice Leaf Diseases - Taiwan Filtered/image_metadata_taiwan_filtered_location.csv")

# Replacing 'Rice Blast' with 'Blast' in the 'Class' column
#metadata_df['Class'] = metadata_df['Class'].replace('Rice Blast', 'Blast')

#metadata_df.to_csv("/content/drive/MyDrive/Data 298B Project Data/Rice Image Datasets - with Location and Time/Rice Leaf Diseases - Taiwan Filtered/image_metadata_taiwan_filtered_location_new_class_names_2.csv", index=False)
