<a href="https://colab.research.google.com/github/stevejj4/Data-Cleaning/blob/main/Stephen_Juma_Assessment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [39]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon
import json
from google.colab import auth
import gspread
from google.auth import default

In [40]:
# Authorizing Google Colab to access Google Sheets
auth.authenticate_user()
creds, _ = default()
gc = gspread.authorize(creds)

# Importing data from Google Sheets
worksheet = gc.open('Data').sheet1
rows = worksheet.get_all_values()
df = pd.DataFrame(rows)
df.columns = df.iloc[0]  # Setting the first row as column names
df = df.iloc[1:]  # Removing the header row


In [41]:
# Parsing the geometry column to create Polygon objects
def parse_polygon(polygon_str):
    try:
        polygon_str = polygon_str.replace('POLYGON ((', '').replace('))', '')
        points = polygon_str.split(', ')
        points = [tuple(map(float, point.split())) for point in points]
        return Polygon(points)
    except Exception as e:
        print(f"Error parsing polygon: {e}")
        return None

df['geometry'] = df['geometry'].apply(parse_polygon)

In [42]:
# Creating a GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry='geometry')

In [43]:
# Parsing the Farm Polygon column to create Polygon objects
def parse_farm_polygon(farm_polygon_str):
    try:
        farm_polygon = eval(farm_polygon_str)  # Converting string representation to a list of dictionaries
        points = [(coord['longitude'], coord['latitude']) for coord in farm_polygon]
        return Polygon(points)
    except Exception as e:
        print(f"Error parsing farm polygon: {e}")
        return None

gdf['Farm Polygon'] = gdf['Farm Polygon'].apply(parse_farm_polygon)


In [44]:
# Aligning farm polygons to match the boundaries of the coffee farms
aligned_polygons = []

for index, row in gdf.iterrows():
    farm_polygon = row['Farm Polygon']
    coffee_polygon = row['geometry']

    if not farm_polygon or not coffee_polygon:
        print(f"Invalid polygon data at index {index}")
        aligned_polygons.append(None)
        continue

    # Checking intersection (if needed)
    if not farm_polygon.intersects(coffee_polygon):
        print(f"No intersection for polygon at index {index}")
        aligned_polygons.append(farm_polygon)
        continue

    # Aligning the polygons (this can involve more complex operations based on specific needs)
    aligned_polygon = farm_polygon.intersection(coffee_polygon)
    aligned_polygons.append(aligned_polygon)

# Adding the aligned polygons back to the GeoDataFrame
gdf['Aligned Polygon'] = aligned_polygons


In [45]:
# Converting Polygon objects to WKT (Well-Known Text) format for JSON serialization
gdf['geometry'] = gdf['geometry'].apply(lambda x: x.wkt if x else None)
gdf['Farm Polygon'] = gdf['Farm Polygon'].apply(lambda x: x.wkt if x else None)
gdf['Aligned Polygon'] = gdf['Aligned Polygon'].apply(lambda x: x.wkt if x else None)


  gdf['geometry'] = gdf['geometry'].apply(lambda x: x.wkt if x else None)


{'spreadsheetId': '1NSYNly_NOj6EIcyrZDnKq14e9BXQNh9RM0n5FrHT3wE',
 'updatedRange': "'Aligned Farm Data'!A1:D7",
 'updatedRows': 7,
 'updatedColumns': 4,
 'updatedCells': 28}

In [None]:
# Checking if the 'Aligned Farm Data' worksheet exists
spreadsheet = gc.open('Data')
worksheet_titles = [ws.title for ws in spreadsheet.worksheets()]

if 'Aligned Farm Data' in worksheet_titles:
    # Load the existing worksheet
    aligned_worksheet = spreadsheet.worksheet('Aligned Farm Data')
else:
    # Create a new worksheet
    aligned_worksheet = spreadsheet.add_worksheet(title='Aligned Farm Data', rows=gdf.shape[0] + 1, cols=gdf.shape[1])

# Updating the worksheet with the new data
aligned_worksheet.update([gdf.columns.values.tolist()] + gdf.values.tolist())


In [47]:
gdf.shape

(6, 4)

In [48]:
gdf.head(6)

Unnamed: 0,boxes_pula_id,geometry,Farm Polygon,Aligned Polygon
1,UG_Kyagalanyi Coffee_ANEE0GM,POLYGON ((30.445659644901838 -0.74046889964000...,POLYGON ((30.445659644901752 -0.74046889964002...,POLYGON ((30.445804819464684 -0.74023087346251...
2,UG_Kyagalanyi Coffee_06TOH25,POLYGON ((30.44573608785877 -0.740643563911432...,POLYGON ((30.445736087858673 -0.74064356391145...,POLYGON ((30.445896685123444 -0.74040352625452...
3,UG_Kyagalanyi Coffee_M5MU19T,POLYGON ((29.765873551369012 -0.83606617507015...,"POLYGON ((29.7658735513689 -0.836066175070194,...","POLYGON ((29.766283258796 -0.835944818031792, ..."
4,UG_Kyagalanyi Coffee_QOZ3JZL,POLYGON ((29.766268506646508 -0.83518348691248...,"POLYGON ((29.7662685066464 -0.83518348691252, ...","POLYGON ((29.766428768635 -0.835476822345826, ..."
5,UG_Kyagalanyi Coffee_ODD7TFK,"POLYGON ((31.13832738250501 0.558417353114317,...",POLYGON ((31.13832738250494 0.5584173531143315...,POLYGON ((31.13817416131503 0.5584163473337335...
6,UG_Kyagalanyi Coffee_Y57I7ZW,POLYGON ((31.138238199055277 0.558602081483414...,POLYGON ((31.138238199055195 0.558602081483428...,POLYGON ((31.138075254857604 0.558591353157248...
