In [7]:
!pip install tqdm



In [8]:
!pip install geopy --quiet

In [9]:
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderServiceError
from pathlib import Path
from tqdm import tqdm

In [10]:
df = pd.read_csv('merged_v2.csv')
df.sample(5)

Unnamed: 0,longitude,latitude,Area,Soil group,Land class,Soil type,pH,SOC,Nitrogen,Potassium,Phosphorus,Sulfur,Boron,Zinc,Sand,Silt,Clay
828,4.83939,9.19896,isda,isda,isda,isda,6.73184,0.971236,0.093137,0.129103,9.40027,9.90998,0.139146,0.849528,64.0,21.0,16.0
1704,11.38653,9.14274,isda,isda,isda,isda,5.90001,0.34102,0.040038,0.186272,13.1206,5.37506,0.038333,0.729824,59.0,22.0,19.0
264,89.4,25.58,Pirgacha,Gangachara,Medium high land,loam,4.8,2.41,0.09,0.09,4.0,12.6,0.27,0.95,40.0,40.0,20.0
852,8.61066,10.79627,isda,isda,isda,isda,6.11613,0.981541,0.058463,0.531764,4.65722,7.58282,0.145003,1.49791,51.0,24.0,23.0
1318,9.12501,11.27679,isda,isda,isda,isda,6.4135,0.221519,0.026024,0.028623,2.81334,7.53988,0.019644,0.578916,59.0,21.0,19.0


In [11]:
df.shape[0]

2584

In [30]:
def get_geospacial_details(df: pd.DataFrame) -> list[list[str|float]]:
    location_details = []
    for index, row in tqdm(df.iterrows(), total=df.shape[0], desc="Processing"):
        longitude = str(row.loc["longitude"])
        latitude = str(row.loc["latitude"])
        address = ",".join([latitude, longitude])

        geolocator = Nominatim(user_agent="my_geopy_app")

        try:
            location = geolocator.reverse(address)
            address = location.raw['address']

            # Traverse the data
            city = address.get('city', float("nan"))
            state = address.get('state', float("nan"))
            country = address.get('country', float("nan"))
            location_details.append([city, state, country])


        except GeocoderServiceError as e:
            location_details.append([float("nan"), float("nan"), float("nan")])
    return location_details, state

In [34]:
def get_geo_details(row):
    lon = str(row.loc['longitude'])
    lat = str(row.loc['latitude'])
    address = ",".join([lat, lon])

    geolocator = Nominatim(user_agent="my_geopy_app")

    try:
        location = geolocator.reverse(address)
        address = location.raw['address']

        # Traverse the data
        # city = address.get('city', float("nan"))
        state = address.get('state', float("nan"))
        # country = address.get('country', float("nan"))
        # location_details.append([city, state, country])


    except GeocoderServiceError as e:
        # location_details.append([float("nan"), float("nan"), float("nan")])
        return "Location not found"
    return state



In [31]:
location_details, state = get_geospacial_details(df.loc[df['Area'] == 'isda'].head(20))

Processing: 100%|██████████| 20/20 [00:19<00:00,  1.04it/s]


In [32]:
state

'Kaduna'

In [37]:
df.loc[df['Area'] == 'isda', 'Area'] = df.loc[df['Area'] == 'isda'].apply(get_geo_details, axis=1)



In [41]:
print(df['Area'].unique())
print(f"\nTotal Unique Areas: {df['Area'].nunique()}")

['Mithpukur' 'Pirgacha ' 'Gangachara' 'Kaunia upazila' 'Taraganj Thana'
 'Bauchi' 'Taraba' 'Plateau' 'Kaduna' 'Nasarawa' 'Niger' 'Kebbi' 'Kano'
 'Kwara' 'Katsina' 'Adamawa']

Total Unique Areas: 16


In [44]:
df['Area'].value_counts().reset_index()

Unnamed: 0,Area,count
0,Niger,561
1,Kaduna,402
2,Plateau,359
3,Bauchi,226
4,Nasarawa,215
5,Mithpukur,186
6,Taraba,135
7,Pirgacha,125
8,Katsina,81
9,Taraganj Thana,79


## Sanity check

In [39]:
df['Area'].isna().sum()

0

In [42]:
df.loc[df['Area'] == 'Location not found'].shape

(0, 17)

In [45]:
df.to_csv('merged_v3.csv', index=False)