In [None]:
import pandas as pd
import math

In [None]:
# Remove columns that are not needed
dftrees = pd.read_csv('TreesPortugueseTerritory.csv', sep='\t', usecols=['scientificName', 'locality', 'stateProvince','occurrenceStatus','individualCount','decimalLatitude', 'decimalLongitude', 'coordinateUncertaintyInMeters','coordinatePrecision','elevation','elevationAccuracy','depth','depthAccuracy'])
dftrees.to_csv('TreesPortugueseTerritoryDropped.csv', index=False)

In [None]:
dfTreesDRP = pd.read_csv('TreesPortugueseTerritoryDropped.csv')
print(dfTreesDRP['occurrenceStatus'].unique())
print(dfTreesDRP['individualCount'].unique())
print(dfTreesDRP['coordinatePrecision'].unique())
print(dfTreesDRP['elevation'].unique())

In [None]:
dfTreesDRP['stateProvince'] = dfTreesDRP['stateProvince'].replace('Bragança District', 'Bragança')
dfTreesDRP['locality'] = dfTreesDRP['locality'].replace('Ovadas e Panchora', 'Ovadas e Panchorra')

dfTreesDRP.to_csv('TreesPortugueseTerritoryDropped.csv', index=False)

In [68]:
import math

def DistanceTwoPoints(lat1, lon1, lat2, lon2):
    R = 6371e3  # Radius of the Earth in meters
    phi1 = math.radians(lat1)
    phi2 = math.radians(lat2)
    delta_phi = math.radians(lat2 - lat1)
    delta_lambda = math.radians(lon2 - lon1)

    a = math.sin(delta_phi / 2)**2 + math.cos(phi1) * math.cos(phi2) * math.sin(delta_lambda / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))

    return R * c  # Distance in meters


def check_locality(row):
    if row['scientificNames'] != '':
        return row['scientificNames']

    parish = str(row['parish']).lower()
    district = str(row['district']).lower()
    unique_names = dfTreesDRP[(dfTreesDRP['locality'].str.lower().contains(parish, na=False)) & 
                              (dfTreesDRP['stateProvince'] == district)]['scientificName'].unique()
    return '; '.join(unique_names)


def check_locality2(row):
    if row['scientificNames'] != '':
        return row['scientificNames']
    
    concelho = str(row['municipality']).lower()
    district = str(row['district']).lower()
    unique_names = dfTreesDRP[(dfTreesDRP['locality'].str.lower().contains(concelho, na=False)) & 
                              (dfTreesDRP['stateProvince'].str.lower() == district)]['scientificName'].unique()
    return '; '.join(unique_names)



def check_district(row, precision=120):
    if row['scientificNames'] != '':
        return row['scientificNames']
    
    district = str(row['district']).lower()
    lat1 = row['latitude']
    lon1 = row['longitude']
    
    # Filter dfTreesDRP based on 'locality' and 'stateProvince'
    filtered_df = dfTreesDRP[(dfTreesDRP['stateProvince'].str.lower() == district)]
    
    # Calculate the distance for each row in the filtered DataFrame
    filtered_df['distance'] = filtered_df.apply(lambda x: DistanceTwoPoints(lat1, lon1, x['decimalLatitude'], x['decimalLongitude']), axis=1)
    
    # Filter the DataFrame based on the distance
    close_points_df = filtered_df[filtered_df['distance'] < precision]
    
    # Get the unique 'scientificName' values
    unique_names = close_points_df['scientificName'].unique()
    
    return '; '.join(unique_names)




def check_districtCoordSingular(row, precision=500):
    if row['scientificNames'] != '':
        return [(row['scientificNames'], 0)]
    
    district = str(row['district']).lower()
    lat1 = row['latitude']
    lon1 = row['longitude']
    
    # Filter dfTreesDRP based on 'locality' and 'stateProvince'
    filtered_df = dfTreesDRP[(dfTreesDRP['stateProvince'].str.lower() == district)]
    
    # Calculate the distance for each row in the filtered DataFrame
    filtered_df['distance'] = filtered_df.apply(lambda x: DistanceTwoPoints(lat1, lon1, x['decimalLatitude'], x['decimalLongitude']), axis=1)
    
    # Filter the DataFrame based on the distance
    close_points_df = filtered_df[filtered_df['distance'] < precision]
    
    # If there are no close points, return an empty list
    if close_points_df.empty:
        return []
    
    # Sort the DataFrame by the 'distance' column
    close_points_df = close_points_df.sort_values('distance')
    
    # Get the 'scientificName' and 'distance' of the 5 closest points
    closest_points = close_points_df.iloc[:5][['scientificName', 'distance']].apply(tuple, axis=1).tolist()
    
    return closest_points


def check_districtCoord(row, precision=500):
    if row['scientificNames'] != '':
        return row['scientificNames']
    
    district = str(row['district']).lower()
    lat1 = row['latitude']
    lon1 = row['longitude']
    
    # Filter dfTreesDRP based on 'locality' and 'stateProvince'
    filtered_df = dfTreesDRP[(dfTreesDRP['stateProvince'].str.lower() == district)]
    
    # Calculate the distance for each row in the filtered DataFrame
    filtered_df['distance'] = filtered_df.apply(lambda x: DistanceTwoPoints(lat1, lon1, x['decimalLatitude'], x['decimalLongitude']), axis=1)
    
    # Filter the DataFrame based on the distance
    close_points_df = filtered_df[filtered_df['distance'] < precision]
    
    # If there are no close points, return an empty string
    if close_points_df.empty:
        return ''
    
    # Sort the DataFrame by the 'distance' column
    close_points_df = close_points_df.sort_values('distance')
    
    # Get the unique 'scientificName' values of the 5 closest points
    unique_names = close_points_df.iloc[:5]['scientificName'].unique()
    
    return '; '.join(unique_names)


In [None]:
DistanceTwoPoints(51.5007, 0.1246, 40.6892, 74.0445)

In [None]:
#Faro,Olhão,Moncarapacho

original_lat, original_lon = 37.0787222226461, -7.8068333334393 

#decimalLatitude,decimalLongitude

dfTreesDRP['distance'] = dfTreesDRP.apply(lambda row: DistanceTwoPoints(original_lat, original_lon, row['decimalLatitude'], row['decimalLongitude']), axis=1)

dfNearby = dfTreesDRP[dfTreesDRP['distance'] <= 500]

In [None]:
print(dfNearby['scientificName'].unique())
print(dfNearby.head())
#search for locality

In [45]:
_year = 2023
dfFires = pd.read_csv(f"Dataset/b{_year}.csv")
print(len(dfFires))

#dfFires['scientificNames'] = dfFires['parish'].apply(check_locality)

#print(dfFires)

dfFires['scientificNames'] = dfFires.apply(check_locality, axis=1)

dfFires.to_csv(f"v2ParishTreeSpeciesb{_year}.csv", index=False)




2499


  unique_names = dfTreesDRP[(dfTreesDRP['locality'].str.contains(parish, na=False)) &


In [47]:
secrow = dfFires.iloc[2]
print(secrow['scientificNames'])

Tracheophyta; Pinus pinaster Aiton; Vitis vinifera L.; Eucalyptus L'Hér.; Castanea sativa Mill.; Magnoliopsida


In [50]:
empty_count = (dfFires['scientificNames'] == '').sum()
print(empty_count)

non_empty_count = (dfFires['scientificNames'] != '').sum()
print(non_empty_count)

1946
553


In [53]:
print(dfFires.head())

   year        date district        municipality          parish  \
0  2023  2023-01-21     Faro        Castro Marim        Odeleite   
1  2023  2023-01-27    Porto             Lousada          Casais   
2  2023  2023-01-27    Porto  Marco de Canaveses           Sande   
3  2023  2023-01-28   Aveiro              Anadia           Moita   
4  2023  2023-01-29     Beja          Vidigueira  Vila de Frades   

                  local   latitude  longitude         cause  elevation  ...  \
0   IC27 (EN122) Km 106  37.320278  -7.486111  Desconhecida       83.0  ...   
1    Lameirão (Covilhã)  41.267195  -8.315838  Desconhecida      230.0  ...   
2     Lg. Mexide (Agro)  41.115955  -8.185733  Desconhecida      405.0  ...   
3             Junqueira  40.432500  -8.367778  Desconhecida      224.0  ...   
4  Quinta das Choupanas  38.208889  -7.817500  Desconhecida      193.0  ...   

  hourly.direct_normal_irradiance_instant  \
0                                   571.7   
1                         

In [55]:
dfFires['scientificNames'] = dfFires.apply(check_locality2, axis=1)
empty_count = (dfFires['scientificNames'] == '').sum()
print(empty_count)

non_empty_count = (dfFires['scientificNames'] != '').sum()
print(non_empty_count)

1761
738


In [60]:
dfFires['scientificNames'] = dfFires.apply(check_district, axis=1)
empty_count = (dfFires['scientificNames'] == '').sum()
print(empty_count)

non_empty_count = (dfFires['scientificNames'] != '').sum()
print(non_empty_count)

1481
1018


In [61]:
dfFires.to_csv(f"v3ParishTreeSpeciesb{_year}.csv", index=False)

In [66]:
vazio = dfFires.iloc[1]
print(vazio['scientificNames'])
check_districtCoordSingular(vazio, precision=300)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['distance'] = filtered_df.apply(lambda x: DistanceTwoPoints(lat1, lon1, x['decimalLatitude'], x['decimalLongitude']), axis=1)


[('Tracheophyta', 188.554610087213), ('Tracheophyta', 188.554610087213)]

In [69]:
dfFires['scientificNames'] = dfFires.apply(check_districtCoord, axis=1)
empty_count = (dfFires['scientificNames'] == '').sum()
print(empty_count)

non_empty_count = (dfFires['scientificNames'] != '').sum()
print(non_empty_count)