In [None]:
import pandas as pd
import os

# Directory containing the CSV files
directory = 'forest_fire_data'  # Forest survey of India (FSI) dosn't allow to download all at once, this folder has the files which are 
                                # downloaded manually in parts

# List to store all dataframes
dfs = []

# Iterate over each file in the directory
for filename in os.listdir(directory):
    if filename.endswith('.csv'):  # Assuming all files are in .csv format
        file_path = os.path.join(directory, filename)
        try:
            # Read the CSV file into a dataframe
            df = pd.read_csv(file_path, encoding='latin1')
            # Append the dataframe to the list
            dfs.append(df)
        except Exception as e:
            print(f"Error reading {filename}: {e}")

# Concatenate all dataframes into one
combined_df = pd.concat(dfs, ignore_index=True)

# Write the combined dataframe to a new CSV file
combined_df.to_csv('combined_forest_fire_data.csv', index=False)


In [7]:
import pandas as pd

# Read the combined CSV file into a dataframe
combined_df = pd.read_csv('combined_forest_fire_data.csv')

# Convert the 'firedate' column to datetime format with the correct format
combined_df['firedate'] = pd.to_datetime(combined_df['firedate'], format='%d-%m-%Y')

# Sort the dataframe by the 'firedate' column in ascending order
combined_df_sorted = combined_df.sort_values(by='firedate')

# Write the sorted dataframe to a new CSV file
combined_df_sorted.to_csv('combined_forest_fire_data_sorted.csv', index=False)


In [None]:
import pandas as pd

# Replace with the path to your CSV file
csv_file_path = 'combined_forest_fire_data_sorted.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Add a new column with a constant value of 1
df['fire'] = 1

# Select the specified columns
df_filtered = df[['latitude', 'longitude', 'firedate','fire']]

df_filtered.to_csv("fire_points_india.csv")



                   uniqueid    firedate  firetime sourcetype  longitude  \
0  TUNEMTRNTF8yMDAyMTEuY3N2  2002-11-01  14:05:00      MODIS     74.855   
1  TUNEMTRNTF8yMDAyMTEuY3N2  2002-11-01  12:23:00      MODIS     94.061   
2  TUNEMTRNTF8yMDAyMTEuY3N2  2002-11-01  12:23:00      MODIS     94.061   
3  TUNEMTRNTF8yMDAyMTEuY3N2  2002-11-01  14:04:00      MODIS     76.413   
4  TUNEMTRNTF8yMDAyMTEuY3N2  2002-11-01  14:04:00      MODIS     76.408   

   latitude       longdeg        latdeg              state     district  \
0    34.698  74° 51' 18"E  34° 41' 52"N  JAMMU AND KASHMIR    BARAMULLA   
1    24.810  94° 03' 39"E  24° 48' 35"N            MANIPUR  IMPHAL EAST   
2    24.810  94° 03' 39"E  24° 48' 35"N            MANIPUR  IMPHAL EAST   
3    30.213  76° 24' 46"E  30° 12' 46"N             PUNJAB      PATIALA   
4    30.249  76° 24' 28"E  30° 14' 56"N             PUNJAB      PATIALA   

                  circle          division           rangename  \
0                    NaN        

In [7]:
import numpy as np
import pandas as pd

# Define the target latitude and longitude ranges
target_lat = np.arange(6.5, 38.5, 0.25)
target_lon = np.arange(66.5, 100, 0.25)

# Create meshgrid
lon_grid, lat_grid = np.meshgrid(target_lon, target_lat)

# Flatten and create DataFrame
grid_df = pd.DataFrame({
    'lat': lat_grid.ravel(),
    'lon': lon_grid.ravel()
})

# Save to CSV
grid_df.to_csv('grid_india_0.125.csv', index=False)

# Optional: print the shape
print("Number of rows:", grid_df.shape[0])  # Should be len(lat) * len(lon)

Number of rows: 17152


In [8]:
import pandas as pd
import numpy as np
from scipy.spatial import cKDTree

# Load the grid file (which has lat/lon grid points)
grid_df = pd.read_csv('grid_india_0.125.csv')  # Replace with actual file
grid_points = grid_df[['lat', 'lon']].drop_duplicates().values

# Build KDTree from grid points
tree = cKDTree(grid_points)

# Load the point file (fire archive or whatever you're mapping)
point_df = pd.read_csv('fire_points_india.csv')  # Replace with your file
points = point_df[['latitude', 'longitude']].values

# Query the tree for nearest neighbors
distances, indices = tree.query(points)

# Get nearest lat/lon for each point
nearest_points = grid_points[indices]
point_df['nearest_lat'] = nearest_points[:, 0]
point_df['nearest_lon'] = nearest_points[:, 1]

# Optionally, get index values (like grid ID or state info) by merging back with grid_df
point_df = point_df.merge(
    grid_df[['lat', 'lon']],  # Add more columns if needed
    left_on=['nearest_lat', 'nearest_lon'],
    right_on=['lat', 'lon'],
    how='left'
)

# Save output
point_df.to_csv('fire_data_points_only_india.csv', index=False)

print(point_df.head())

   Unnamed: 0  latitude  longitude    firedate  fire  nearest_lat  \
0           0    34.698     74.855  2002-11-01     1        34.75   
1           1    24.810     94.061  2002-11-01     1        24.75   
2           2    24.810     94.061  2002-11-01     1        24.75   
3           3    30.213     76.413  2002-11-01     1        30.25   
4           4    30.249     76.408  2002-11-01     1        30.25   

   nearest_lon    lat    lon  
0        74.75  34.75  74.75  
1        94.00  24.75  94.00  
2        94.00  24.75  94.00  
3        76.50  30.25  76.50  
4        76.50  30.25  76.50  


In [None]:
# Import required libraries
import pandas as pd

# Load CSV data
df = pd.read_csv('fire_data_points_only_india.csv')

# Group by unique latitude, longitude pairs, and acquisition date
grouped = df.groupby(['nearest_lat', 'nearest_lon', 'firedate'])

# Calculate number of rows and sum of "frp" values
result = grouped.agg(
    fire_count=('fire', 'size'),
).reset_index()

# Convert 'firedate' to YYYYMMDD format
result['firedate'] = pd.to_datetime(result['firedate'], format='%d-%m-%Y').dt.strftime('%Y%m%d')

result = result.rename(columns={'firedate': 'Date', 'nearest_lat': 'Lat', 'nearest_lon': 'Lon'})

# Reorder columns to have 'firedate' first
result = result[[ 'Lat', 'Lon', 'Date', 'fire_count']]

# Export results to CSV
result.to_csv('varaibles/firecount.csv', index=False)

# Display the result
result.head()


