In [44]:
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [45]:
regions_level2 = gpd.read_file("../france_boundary/gadm36_FRA_2.shp")
regions_level2.head()

Unnamed: 0,GID_0,NAME_0,GID_1,NAME_1,NL_NAME_1,GID_2,NAME_2,VARNAME_2,NL_NAME_2,TYPE_2,ENGTYPE_2,CC_2,HASC_2,geometry
0,FRA,France,FRA.1_1,Auvergne-Rhône-Alpes,,FRA.1.1_1,Ain,,,Département,Department,1,FR.AI,"POLYGON ((5.25563 45.78431, 5.24020 45.77737, ..."
1,FRA,France,FRA.1_1,Auvergne-Rhône-Alpes,,FRA.1.2_1,Allier,Basses-Alpes,,Département,Department,3,FR.AL,"POLYGON ((2.38608 46.33197, 2.37061 46.31272, ..."
2,FRA,France,FRA.1_1,Auvergne-Rhône-Alpes,,FRA.1.3_1,Ardèche,,,Département,Department,7,FR.AH,"POLYGON ((4.23405 44.27506, 4.21638 44.28904, ..."
3,FRA,France,FRA.1_1,Auvergne-Rhône-Alpes,,FRA.1.4_1,Cantal,,,Département,Department,15,FR.CL,"POLYGON ((2.85493 44.87431, 2.85140 44.87195, ..."
4,FRA,France,FRA.1_1,Auvergne-Rhône-Alpes,,FRA.1.5_1,Drôme,,,Département,Department,26,FR.DM,"POLYGON ((4.81214 44.24206, 4.81251 44.25802, ..."


In [46]:
# get regions names for each state
regions_level2 = regions_level2[['NAME_1', 'NAME_2', 'geometry']]
regions_level2.columns = ['state', 'region', 'geometry']
regions_level2.head()


Unnamed: 0,state,region,geometry
0,Auvergne-Rhône-Alpes,Ain,"POLYGON ((5.25563 45.78431, 5.24020 45.77737, ..."
1,Auvergne-Rhône-Alpes,Allier,"POLYGON ((2.38608 46.33197, 2.37061 46.31272, ..."
2,Auvergne-Rhône-Alpes,Ardèche,"POLYGON ((4.23405 44.27506, 4.21638 44.28904, ..."
3,Auvergne-Rhône-Alpes,Cantal,"POLYGON ((2.85493 44.87431, 2.85140 44.87195, ..."
4,Auvergne-Rhône-Alpes,Drôme,"POLYGON ((4.81214 44.24206, 4.81251 44.25802, ..."


In [47]:
# create a list with the names of the regions for a each state
states = regions_level2.groupby('state')['region'].apply(list).reset_index()
states

Unnamed: 0,state,region
0,Auvergne-Rhône-Alpes,"[Ain, Allier, Ardèche, Cantal, Drôme, Haute-Lo..."
1,Bourgogne-Franche-Comté,"[Côte-d'Or, Doubs, Haute-Saône, Jura, Nièvre, ..."
2,Bretagne,"[Côtes-d'Armor, Finistère, Ille-et-Vilaine, Mo..."
3,Centre-Val de Loire,"[Cher, Eure-et-Loir, Indre, Indre-et-Loire, Lo..."
4,Corse,"[Corse-du-Sud, Haute-Corse]"
5,Grand Est,"[Ardennes, Aube, Bas-Rhin, Haut-Rhin, Haute-Ma..."
6,Hauts-de-France,"[Aisne, Nord, Oise, Pas-de-Calais, Somme]"
7,Normandie,"[Calvados, Eure, Manche, Orne, Seine-Maritime]"
8,Nouvelle-Aquitaine,"[Charente, Charente-Maritime, Corrèze, Creuse,..."
9,Occitanie,"[Ariège, Aude, Aveyron, Gard, Gers, Haute-Garo..."


In [48]:
# generate search queries for all types of farms in each regions of each state
farm_types = [
    "dairy farms",
    "poultry farms",
    "cattle farms",
    "livestock farms",
    "pig farms",
    "fish farms",
    "aquaculture farms",
    "egg farmers",
    "chicken hatchery",
    "shrimp farms",
    "seafood farms",
    "beef farms",
    "meat producer"
]

queries = []

for state in states['state']:
    for farm_type in farm_types:
        for region in states[states['state'] == state]['region'].values[0]:
        #for farm_type in farm_types:
            queries.append(f'{farm_type} in {region}, {state}')
            




In [49]:
#store the list in a pandas dataframe
queries_df = pd.DataFrame(queries, columns=['query'])
# divide the dataframe into each state based on the list of states
queries_df2 = queries_df.copy()
queries_df2['state'] = queries_df['query'].apply(lambda x: x.split(', ')[1])

queries_df2.to_csv('farm_queries.csv', index=False)
queries_df2.state.unique()

array(['Auvergne-Rhône-Alpes', 'Bourgogne-Franche-Comté', 'Bretagne',
       'Centre-Val de Loire', 'Corse', 'Grand Est', 'Hauts-de-France',
       'Normandie', 'Nouvelle-Aquitaine', 'Occitanie', 'Pays de la Loire',
       "Provence-Alpes-Côte d'Azur", 'Île-de-France'], dtype=object)

In [50]:
import re

list_states = ['Auvergne-Rhône-Alpes', 'Bourgogne-Franche-Comté', 'Bretagne',
       'Centre-Val de Loire', 'Corse', 'Grand Est', 'Hauts-de-France',
       'Normandie', 'Nouvelle-Aquitaine', 'Occitanie', 'Pays de la Loire',
       "Provence-Alpes-Côte d'Azur", 'Île-de-France']

queries_df3 = queries_df2.copy()

# Loop through each state
for state in list_states:
    # Replace special characters and spaces with underscores
    state_file_name = re.sub(r'[^a-zA-Z0-9]', '_', state)

    # Filter DataFrame based on state and save only the queries for that state to a CSV file
    state_data = queries_df3[queries_df3['state'] == state]
    state_data.to_csv(f'farm_queries_{state_file_name}.csv', index=False)

    #queries_df3[queries_df3['state'] == state].to_csv(f'farm_queries_{state_file_name}.csv', index=False)

In [51]:
import re


list_states = ['Auvergne-Rhône-Alpes', 'Bourgogne-Franche-Comté', 'Bretagne',
       'Centre-Val de Loire', 'Corse', 'Grand Est', 'Hauts-de-France',
       'Normandie', 'Nouvelle-Aquitaine', 'Occitanie', 'Pays de la Loire',
       "Provence-Alpes-Côte d'Azur", 'Île-de-France']

queries_df3 = queries_df2.copy()

# Loop through each state
for state in list_states:
    # Replace special characters and spaces with underscores
    state_file_name = re.sub(r'[^a-zA-Z0-9]', '_', state)

    # Filter DataFrame based on state
    state_data = queries_df3[queries_df3['state'] == state]

    # Save only the queries for that state to a CSV file
    state_data.drop(columns=['state']).to_csv(f'farm_queries_{state_file_name}.csv', index=False)


In [53]:
df = pd.read_csv('farm_queries.csv')

# Generate queries dynamically from DataFrame to list format for web scraping
queries4 = []
for _, row in df.iterrows():
    query = f"{row['query']}"
    queries4.append(query)
    
queries4

['dairy farms in Essonne, Île-de-France',
 'dairy farms in Hauts-de-Seine, Île-de-France',
 'dairy farms in Paris, Île-de-France',
 'dairy farms in Seine-et-Marne, Île-de-France',
 'dairy farms in Seine-Saint-Denis, Île-de-France',
 "dairy farms in Val-d'Oise, Île-de-France",
 'dairy farms in Val-de-Marne, Île-de-France',
 'dairy farms in Yvelines, Île-de-France',
 'poultry farms in Essonne, Île-de-France',
 'poultry farms in Hauts-de-Seine, Île-de-France',
 'poultry farms in Paris, Île-de-France',
 'poultry farms in Seine-et-Marne, Île-de-France',
 'poultry farms in Seine-Saint-Denis, Île-de-France',
 "poultry farms in Val-d'Oise, Île-de-France",
 'poultry farms in Val-de-Marne, Île-de-France',
 'poultry farms in Yvelines, Île-de-France',
 'cattle farms in Essonne, Île-de-France',
 'cattle farms in Hauts-de-Seine, Île-de-France',
 'cattle farms in Paris, Île-de-France',
 'cattle farms in Seine-et-Marne, Île-de-France',
 'cattle farms in Seine-Saint-Denis, Île-de-France',
 "cattle farm