# Welcome To Project B1 Maps💦
Here we present our maps. If you would like to check other charts, you can visit the project's github page.
https://github.com/TimurNizamov/DrinkingWaterQualityPrediction

## Maps description
1. Map with all the markers which we got by using the openstreetmap API.
2. Map with two markers, which indicates testing results from 2007 - 2022. If the marker is red, it means that in this time range, the health department detected problems with the water.
3. Map with bad tests. On this map you can hover, click on the marker and get additional information, like the year when the problem occurred and all compliance problems.

⚠️ Some places are not located correctly. We use openstreetmap API to find the place's lat and lot by it's name to create these amazing maps, which you can enjoy right now. Unfortunately, because of bad naming, some places are not displayed correctly.⚠️

In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point, Polygon, box, LineString
import warnings
warnings.filterwarnings('ignore')
geo = pd.read_csv('./csv/geo_data.csv')

df = pd.read_csv('./csv/water_tests.csv')
df.drop_duplicates()

df = df.drop('testers_job_title', axis= 1)
df = df.drop('testers_name', axis= 1)
# geo.head()
# lon 23 - 28.5
# lat 57-60

# 1. Places where water was checked

In [2]:
def validate_lon(lon):
    return lon if lon > 21 and lon < 28.5 else np.nan
    # lon 23 - 28.5

def validate_lat(lat):
    return lat if lat > 57 and lat < 60 else np.nan
    # lat 57-60
    
test = geo.copy()

test.lat = test.lat.apply(validate_lat)
test.lon = test.lon.apply(validate_lon)

# remove single point from latvia
test.drop(test[test['place'] == 'Maja 3'].index, inplace=True)

clean_geo = test.dropna()
x = list(clean_geo.lat)
y = list(clean_geo.lon)
name = list(clean_geo.place)



points = pd.DataFrame({0:x, 1:y})

geometry = [Point(xy) for xy in zip(points[1], points[0])]
gdf = gpd.GeoDataFrame(geometry=geometry, crs="epsg:4326")
gdf['title'] = name
# gdf['status'] = 'negative'
gdf = gdf.rename(columns={c:str(c) for c in gdf.columns})


a = gdf.explore(
#     column='title',
#     cmap=['blue'],
#     color = ['blue'],
    marker_type='circle',
    legend=False,
    marker_kwds = {'radius':200, 'fill':True},
    )

a

# 2. Places marked with different markers to indicate bad and good results

In [3]:
problematic_places = df[df['compilance'] == 0]

res = ['bad' if place in list(problematic_places.place) else 'good' for place in list(clean_geo.place)]

In [4]:
clean_geo['bad_water'] = res

In [5]:
import geopandas as gpd
from shapely.geometry import Point, Polygon, box, LineString
import pandas as pd

x = list(clean_geo.lat)
y = list(clean_geo.lon)
name = list(clean_geo.place)
res = list(clean_geo.bad_water)

points = pd.DataFrame({0:x, 1:y})

geometry = [Point(xy) for xy in zip(points[1], points[0])]
gdf = gpd.GeoDataFrame(geometry=geometry, crs="epsg:4326")
gdf['title'] = name
gdf['status'] = res
gdf = gdf.rename(columns={c:str(c) for c in gdf.columns})


a = gdf.explore(
    column='status',
    cmap=['red', 'green'],
    color = ['blue'],
    marker_type='circle',
    legend=True,
    marker_kwds = {'radius':200, 'fill':True},
    )

a

# 3. Map with bad results

In [6]:
negative_geo_res = clean_geo[clean_geo.bad_water == 'bad']

In [7]:
def get_years_and_problems(place_name):
    case = df[df.place== place_name].dropna().reset_index()
    problems = set()
    years = set()
    for i in range(len(case)):
        curent_case = case.iloc[i]
        years.add(str(curent_case.year))
        problems.add(str(curent_case.compilance_problem))
        
    return (','.join(problems), ','.join(years))
    

In [8]:
years = []
problems = []
for place in list(clean_geo[clean_geo.bad_water == 'bad'].place):
    problem, year = get_years_and_problems(place)
    years.append(year)
    problems.append(problem)
    

In [9]:
negative_geo_res['year'] = years
negative_geo_res['problem'] = problems

In [10]:
# negative_geo_res

In [11]:
import geopandas as gpd
from shapely.geometry import Point, Polygon, box, LineString
import pandas as pd

x = list(negative_geo_res.lat)
y = list(negative_geo_res.lon)
name = list(negative_geo_res.place)
res = list(negative_geo_res.bad_water)
problem = list(negative_geo_res.problem)
year = list(negative_geo_res.year)

points = pd.DataFrame({0:x, 1:y})

geometry = [Point(xy) for xy in zip(points[1], points[0])]
gdf = gpd.GeoDataFrame(geometry=geometry, crs="epsg:4326")
gdf['title'] = name
gdf['status'] = res
gdf['problem'] = problems
gdf['year'] = year
gdf = gdf.rename(columns={c:str(c) for c in gdf.columns})


a = gdf.explore(
    column='status',
    cmap=['red'],
#     color = ['blue'],
    marker_type='circle',
    legend=True,
    marker_kwds = {'radius':200, 'fill':True},
    )

a