<a href="https://colab.research.google.com/github/sonleh96/wb-gpbp-ldt/blob/dev-ghinwa/Total_Emissions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [48]:
#Libraries Needed
import geopandas as gpd
import pandas as pd
from google.cloud import storage
from io import StringIO

from shapely.geometry import Point

In [1]:
from google.colab import auth
auth.authenticate_user()

In [49]:
# Create a client
client = storage.Client()

# Access the Google Cloud Storage bucket
bucket_name = 'wb-ldt'
bucket = client.get_bucket(bucket_name)

# Define the folder path in the bucket (ensure it ends with '/')
folder_path = 'RS/raw-data/geospatial/climate-trace-emissions/filtered_emissions_data/'

**Download Emissions Data**

In [40]:
files = list()

blobs = bucket.list_blobs(prefix=folder_path)
for i, blob in enumerate(blobs):
  if i == 0:
    continue  # Skip iteration when i == 1

  else:
    files.append(blob.name)

In [52]:
# List and read files into DataFrames
blobs = bucket.list_blobs(prefix=folder_path)

# Initialize an empty list to hold DataFrames
df_list = []

# Iterate through the blobs
for i, blob in enumerate(blobs):
    # Skip the first file, if needed
    if i == 0:
        continue
    elif i == max_files - 3:
        break
    else:
        # Read the content of the blob (assuming it's a CSV file)
        blob_content = blob.download_as_text()  # Download content as a string

        # Convert the content into a DataFrame
        df = pd.read_csv(StringIO(blob_content))

        # Append the DataFrame to the list
        df_list.append(df)
        print(f"Read {blob.name} into a DataFrame")

# Concatenate all DataFrames into a single DataFrame
final_df = pd.concat(df_list, ignore_index=True)
final_df_cleaned = final_df.drop_duplicates()

Read RS/raw-data/geospatial/climate-trace-emissions/filtered_emissions_data/cement_emissions_sources.csv into a DataFrame
Read RS/raw-data/geospatial/climate-trace-emissions/filtered_emissions_data/coal-mining_emissions_sources.csv into a DataFrame
Read RS/raw-data/geospatial/climate-trace-emissions/filtered_emissions_data/copper-mining_emissions_sources.csv into a DataFrame
Read RS/raw-data/geospatial/climate-trace-emissions/filtered_emissions_data/cropland-fires_emissions_sources.csv into a DataFrame
Read RS/raw-data/geospatial/climate-trace-emissions/filtered_emissions_data/domestic-aviation_emissions_sources.csv into a DataFrame
Read RS/raw-data/geospatial/climate-trace-emissions/filtered_emissions_data/domestic-shipping_emissions_sources.csv into a DataFrame
Read RS/raw-data/geospatial/climate-trace-emissions/filtered_emissions_data/electricity-generation_emissions_sources.csv into a DataFrame
Read RS/raw-data/geospatial/climate-trace-emissions/filtered_emissions_data/enteric-ferm

In [62]:
final_df = final_df[['source_id', 'source_name', 'source_type', 'sector', 'subsector', 'lat', 'lon', 'geometry_ref', 'gas', 'emissions_quantity', 'emissions_factor', 'emissions_factor_units']]

In [69]:
# Convert the DataFrame into a GeoDataFrame using lat and lon columns
final_df['geometry'] = final_df.apply(lambda row: Point((row['lon'], row['lat'])), axis=1)

# Define the GeoDataFrame with a coordinate reference system (CRS) such as WGS84 (EPSG:4326)
final_df = gpd.GeoDataFrame(final_df, geometry='geometry', crs="EPSG:4326")

**Serbia Shape File**

In [70]:
#Name of Shape File
file_path = "shapefiles/gadm41_SRB_2.json"
gcs_file_path = 'gs://' + bucket_name + '/' + file_path

#Read Shape File --> The shape file gives a MultiPolygon Geometry Column
gdf = gpd.read_file(gcs_file_path)

#Adjust for GeoSpatial Data
center = gpd.GeoDataFrame(gdf[['GID_2', 'NAME_2']])

#Change the MultiPolygon Geometry Column to make it more useful
center['geometry'] = gdf.centroid
center = center.to_crs(gdf.crs)
center['lat'] = center.geometry.y
center['lon'] = center.geometry.x


  center['geometry'] = gdf.centroid
  center['geometry'] = gdf.centroid


In [71]:
emissions = gpd.sjoin(final_df, gdf, predicate = 'within',
                      how = 'inner')

In [73]:
emissions.head()

Unnamed: 0,source_id,source_name,source_type,sector,subsector,lat,lon,geometry_ref,gas,emissions_quantity,...,GID_1,NAME_1,NL_NAME_1,NAME_2,VARNAME_2,NL_NAME_2,TYPE_2,ENGTYPE_2,CC_2,HASC_2
0,18657080,Beočin cement plant,Dry,manufacturing,cement,45.207937,19.707231,trace_19.707231_45.207937,ch4,,...,SRB.5_1,Južno-Bački,Јужнобачки,Beočin,,Беочин,Opštine,Town|Municipal,,
1,18657080,Beočin cement plant,Dry,manufacturing,cement,45.207937,19.707231,trace_19.707231_45.207937,co2,0.0,...,SRB.5_1,Južno-Bački,Јужнобачки,Beočin,,Беочин,Opštine,Town|Municipal,,
2,18657080,Beočin cement plant,Dry,manufacturing,cement,45.207937,19.707231,trace_19.707231_45.207937,co2e_100yr,0.0,...,SRB.5_1,Južno-Bački,Јужнобачки,Beočin,,Беочин,Opštine,Town|Municipal,,
3,18657080,Beočin cement plant,Dry,manufacturing,cement,45.207937,19.707231,trace_19.707231_45.207937,co2e_20yr,0.0,...,SRB.5_1,Južno-Bački,Јужнобачки,Beočin,,Беочин,Opštine,Town|Municipal,,
4,18657080,Beočin cement plant,Dry,manufacturing,cement,45.207937,19.707231,trace_19.707231_45.207937,n2o,,...,SRB.5_1,Južno-Bački,Јужнобачки,Beočin,,Беочин,Opštine,Town|Municipal,,


In [77]:
grouped_emissions = emissions.groupby(['GID_2', 'NAME_2', 'gas']).agg({
    'emissions_quantity': 'sum',        # Sum of the 'Sales' column
    'emissions_factor': 'mean'     # Average of the 'Quantity' column
})

In [79]:
grouped_emissions = grouped_emissions.reset_index()
#len(grouped_emissions)

In [82]:
#Save the HealthCare Facilities in Serbia CSV file
from io import BytesIO

# Convert DataFrame to CSV in memory
csv_buffer = BytesIO()
grouped_emissions.to_csv(csv_buffer, index=False)

# Move the buffer's position to the beginning
csv_buffer.seek(0)

def upload_csv_to_gcs(bucket_name, destination_blob_name, file_buffer):
    """Uploads a file from a buffer to Google Cloud Storage."""
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    # Upload the file-like object (buffer)
    blob.upload_from_file(file_buffer, content_type='text/csv')
    print(f"File uploaded to {destination_blob_name}.")

# Define your GCS bucket name and destination path
destination_blob_name = 'RS/processed-data/SRB_emissions-all-sources.csv'

# Upload the CSV from the buffer directly
upload_csv_to_gcs(bucket_name, destination_blob_name, csv_buffer)

File uploaded to RS/processed-data/SRB_emissions-all-sources.csv.


In [83]:
grouped_emissions = emissions.groupby(['GID_2', 'NAME_2', 'gas', 'sector']).agg({
    'emissions_quantity': 'sum',        # Sum of the 'Sales' column
    'emissions_factor': 'mean'     # Average of the 'Quantity' column
})

In [85]:
grouped_emissions = grouped_emissions.reset_index()

In [89]:
#Save the HealthCare Facilities in Serbia CSV file
from io import BytesIO

# Convert DataFrame to CSV in memory
csv_buffer = BytesIO()
grouped_emissions.to_csv(csv_buffer, index=False)

# Move the buffer's position to the beginning
csv_buffer.seek(0)

def upload_csv_to_gcs(bucket_name, destination_blob_name, file_buffer):
    """Uploads a file from a buffer to Google Cloud Storage."""
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    # Upload the file-like object (buffer)
    blob.upload_from_file(file_buffer, content_type='text/csv')
    print(f"File uploaded to {destination_blob_name}.")

# Define your GCS bucket name and destination path
destination_blob_name = 'RS/processed-data/SRB_emissions-per-sector-all-sources.csv'

# Upload the CSV from the buffer directly
upload_csv_to_gcs(bucket_name, destination_blob_name, csv_buffer)

File uploaded to RS/processed-data/SRB_emissions-per-sector-all-sources.csv.
