In [0]:
%pip install azure-cosmos

In [0]:
import pandas as pd
import numpy as np
import scipy
from scipy.stats import linregress
import os

from azure.cosmos import exceptions, CosmosClient, PartitionKey
# Initialize the Cosmos client
endpoint = os.environ.get('COSMOS_ENDPOINT')
key = os.environ.get('COSMOS_KEY')
outlet_id = "1"
city = "pune"

In [0]:
# Get Outlet from cosmos db
client = CosmosClient(endpoint, key)
database_name = 'bhoojal_outlets'
container_name = 'outlet'

database = client.create_database_if_not_exists(id=database_name)

container = database.create_container_if_not_exists(
    id=container_name, 
    partition_key=PartitionKey(path="/city"),
    offer_throughput=400
)

outlet_item = container.read_item(item=outlet_id, partition_key=city)
outlet_item

In [0]:
# Query depth data from cosmosdb
# outlet_item['location']['coordinates'][0] = 73.1117764
# outlet_item['location']['coordinates'][1] = 18.9093286
container_depth = database.create_container_if_not_exists(
    id="region_depth", 
    partition_key=PartitionKey(path="/city"),
    offer_throughput=400
)
distance_query = "ST_DISTANCE(f.boundary, {\"type\":\"Point\",\"coordinates\": [" + str(outlet_item['location']['coordinates'][0]) + ", " + str(outlet_item['location']['coordinates'][1]) + "]})"
depth_query = "SELECT f.id,f.depth,f.scannedIn,"+ distance_query +" as distance FROM f WHERE " + distance_query + " < 600" 
depth_query

In [0]:
depth_result = list(container_depth.query_items(
    query=depth_query,
    enable_cross_partition_query=True
))
depth_result

In [0]:
# Aggregate depths per quarter for the outlet
depth_df = pd.DataFrame(depth_result, columns = ['scannedIn','depth'])
depth_df = depth_df.groupby('scannedIn', as_index=False).agg({"depth": "mean"})
depth_df

Unnamed: 0,scannedIn,depth
0,2020Q1,5.7
1,2020Q2,4.0
2,2020Q3,6.0
3,2020Q4,6.0


In [0]:
# Get Rain data logged that is within the range of the outlet
# outlet_item['location']['coordinates'][0] = 73.1117764
# outlet_item['location']['coordinates'][1] = 18.9093286
container_rain = database.create_container_if_not_exists(
    id="region_rain", 
    partition_key=PartitionKey(path="/city"),
)
distance_query = "ST_DISTANCE(f.boundary, {\"type\":\"Point\",\"coordinates\": [" + str(outlet_item['location']['coordinates'][0]) + ", " + str(outlet_item['location']['coordinates'][1]) + "]})"
rain_query = "SELECT f.id,f.depth,f.scannedIn,"+ distance_query +" as distance FROM f WHERE " + distance_query + " < 2000" 
rain_query

In [0]:
rain_result = list(container_rain.query_items(
    query=rain_query,
    enable_cross_partition_query=True
))
rain_result

In [0]:
# Aggregate rain per quarter for the outlet
rain_df = pd.DataFrame(rain_result, columns = ['scannedIn','rain'])
rain_df = rain_df.groupby('scannedIn', as_index=False).agg({"rain": "mean"})
rain_df

In [0]:
# Initialize X axis
x = pd.Series([1,2,3,4])
# Fetch Depth data for last 4 quarters
depth = pd.Series(depth_df['depth'])
# Fetch Rain data for last 4 quarters
rain = pd.Series(rain_df['rain'])

# Normalize depth and rain
depth = (depth - min(depth)) / (max(depth) - min(depth))
rain = (rain - min(rain)) / (max(rain) - min(rain))

# Calculate slope of the Depth metrics
depth_slope, intercept, r_value, p_value, std_err = linregress(x, depth)
depth_slope = depth_slope * -1
print("Depth slope:",depth_slope) # Invert depth slope as higher values mean lesser water

# Calculate slope of the Rain metrics
rain_slope, intercept, r_value, p_value, std_err = linregress(x, rain)
print("Rain slope:",rain_slope)

#Score is summation of both slopes
score = depth_slope + rain_slope
print("Score:",score)

In [0]:
outlet_item['score'] = score
response = container.upsert_item(body=outlet_item)