In [1]:
import pandas as pd
import re

data = {
    'ID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'Description': [
        'Forest Conservation Project in Spain',
        'River Cleanup Initiative Project 2021 in Portugal',
        'Urban Gardening Community Project in Germany',
        'Forest Reforestation Project 2022 in France',
        'Wildlife Protection Project Plan in Italy',
        'Endangered Species Conservation Project in Greece',
        'Wetland Restoration Project in Spain',
        'Marine Life Conservation Project in Portugal',
        'Air Quality Improvement Project Initiative in Germany',
        'Habitat Preservation Project for Birds in France'
    ],
    'Date': ['2021-03-15', '2021-06-20', '2022-01-11', '2022-04-05', '2023-02-22', '2023-05-30', '2021-09-13', '2022-07-19', '2023-03-08', '2022-11-21'],
    'Location': ['madrid, spain', 'LISBON, Portugal', 'berlin, germany', 'Paris, France', 'rome, Italy', 'Athens, GREECE', 'Valencia, Spain', 'PORTO, Portugal', 'Munich, Germany', 'Lyon, France'],
    'Budget': ['$20000', '€15000', '€12000', '£18000', '$25000', '€20000', '$17000', '€13000', '€11000', '£16000'],
    'Notes': [
        'Focusing on native forest species in Spain',
        'Cleanup of the Tagus river in Portugal. Endangered species alert!',
        'Community project in urban Berlin, Germany',
        'Reforestation of oak trees in Paris, France',
        'Plan for protecting local wildlife in Italy. Endangered species identified.',
        'Study on the impact on endangered bird species in Greece',
        'Restoration of wetlands in Valencia, Spain',
        'Conservation of marine life in Porto, Portugal',
        'Initiative for improving air quality in Munich, Germany',
        'Preservation of bird habitats in Lyon, France'
    ]
}



environment_df = pd.DataFrame(data)

print(environment_df)

   ID                                        Description        Date  \
0   1               Forest Conservation Project in Spain  2021-03-15   
1   2  River Cleanup Initiative Project 2021 in Portugal  2021-06-20   
2   3       Urban Gardening Community Project in Germany  2022-01-11   
3   4        Forest Reforestation Project 2022 in France  2022-04-05   
4   5          Wildlife Protection Project Plan in Italy  2023-02-22   
5   6  Endangered Species Conservation Project in Greece  2023-05-30   
6   7               Wetland Restoration Project in Spain  2021-09-13   
7   8       Marine Life Conservation Project in Portugal  2022-07-19   
8   9  Air Quality Improvement Project Initiative in ...  2023-03-08   
9  10   Habitat Preservation Project for Birds in France  2022-11-21   

           Location  Budget                                              Notes  
0     madrid, spain  $20000         Focusing on native forest species in Spain  
1  LISBON, Portugal  €15000  Cleanup of the T

In [2]:
# Standardising 'Location'
environment_df['Location'] = environment_df['Location'].apply(lambda x: x.title())

# Extracting 'Year'
environment_df['Year'] = pd.to_datetime(environment_df['Date']).dt.year

print(environment_df)

   ID                                        Description        Date  \
0   1               Forest Conservation Project in Spain  2021-03-15   
1   2  River Cleanup Initiative Project 2021 in Portugal  2021-06-20   
2   3       Urban Gardening Community Project in Germany  2022-01-11   
3   4        Forest Reforestation Project 2022 in France  2022-04-05   
4   5          Wildlife Protection Project Plan in Italy  2023-02-22   
5   6  Endangered Species Conservation Project in Greece  2023-05-30   
6   7               Wetland Restoration Project in Spain  2021-09-13   
7   8       Marine Life Conservation Project in Portugal  2022-07-19   
8   9  Air Quality Improvement Project Initiative in ...  2023-03-08   
9  10   Habitat Preservation Project for Birds in France  2022-11-21   

           Location  Budget  \
0     Madrid, Spain  $20000   
1  Lisbon, Portugal  €15000   
2   Berlin, Germany  €12000   
3     Paris, France  £18000   
4       Rome, Italy  $25000   
5    Athens, Greece  

In [4]:
# Fixed conversion rates
conversion_rates = {'$': 1.0, '€': 1.1, '£': 1.3}  # Example rates: 1 Euro = 1.1 USD, 1 Pound = 1.3 USD

def convert_to_usd(budget_str):
    # Extracting the currency symbol and amount
    currency_symbol = budget_str[0]
    amount = float(budget_str[1:])

    # Converting to USD
    if currency_symbol in conversion_rates:
        return amount * conversion_rates[currency_symbol]
    else:
        return amount

# Converting 'Budget' to numeric USD values
environment_df['Budget_USD'] = environment_df['Budget'].apply(convert_to_usd)

# Calculating total budget for "forest"-related projects in USD
total_budget_forest_usd = environment_df[environment_df['Description'].str.contains("forest", case=False)]['Budget_USD'].sum()
print(total_budget_forest_usd)

43400.0


In [5]:
# Using regex to identify mentions of endangered species
environment_df['Endangered_species'] = environment_df['Notes'].str.contains(r'endangered species', flags=re.IGNORECASE).map({True: 'Yes', False: 'No'})

print(environment_df)

   ID                                        Description        Date  \
0   1               Forest Conservation Project in Spain  2021-03-15   
1   2  River Cleanup Initiative Project 2021 in Portugal  2021-06-20   
2   3       Urban Gardening Community Project in Germany  2022-01-11   
3   4        Forest Reforestation Project 2022 in France  2022-04-05   
4   5          Wildlife Protection Project Plan in Italy  2023-02-22   
5   6  Endangered Species Conservation Project in Greece  2023-05-30   
6   7               Wetland Restoration Project in Spain  2021-09-13   
7   8       Marine Life Conservation Project in Portugal  2022-07-19   
8   9  Air Quality Improvement Project Initiative in ...  2023-03-08   
9  10   Habitat Preservation Project for Birds in France  2022-11-21   

           Location  Budget  \
0     Madrid, Spain  $20000   
1  Lisbon, Portugal  €15000   
2   Berlin, Germany  €12000   
3     Paris, France  £18000   
4       Rome, Italy  $25000   
5    Athens, Greece  

In [6]:
# Extract 'Country' from 'Location'
environment_df['Country'] = environment_df['Location'].apply(lambda x: x.split(', ')[-1])

# Extract 'Project Type' from 'Description'
environment_df['Project_Type'] = environment_df['Description'].str.extract(r'(\b\w+\b) Project')[0]

# Generate the report
report = environment_df.groupby('Country').agg(
    Total_Projects=('ID', 'count'),
    Average_Budget=('Budget_USD', 'mean')
)

# Identify top three most common project types
top_project_types = environment_df['Project_Type'].value_counts().nlargest(3).index.tolist()
report['Top_Project_Types'] = ', '.join(top_project_types)

print(report)


          Total_Projects  Average_Budget                    Top_Project_Types
Country                                                                      
France                 2         22100.0  Conservation, Initiative, Community
Germany                2         12650.0  Conservation, Initiative, Community
Greece                 1         22000.0  Conservation, Initiative, Community
Italy                  1         25000.0  Conservation, Initiative, Community
Portugal               2         15400.0  Conservation, Initiative, Community
Spain                  2         18500.0  Conservation, Initiative, Community
