In [11]:
import sqlite3
import pandas as pd
import geopandas as gpd


In [12]:
sql_query = '''
SELECT
    rl.community,
    rl."type",
    rl.beds,
	rl.has_den,
    rl.sq_feet, 
	rl.baths,
	rl.cats,
	rl.dogs,
    rl.price,
    rl.latitude,
    rl.longitude,
    ROUND(PERCENT_RANK() OVER (ORDER BY c.crime_count),2) AS crime_percentile,
    (SELECT s.name FROM schools s
        JOIN school_ranking sr ON sr.school_id = s.school_id
        WHERE sr.school_id = swwz.school_id
        ORDER BY sr.school_rating DESC
        LIMIT 1) AS highest_rated_school_name,
    MAX(sr.school_rating) AS highest_school_rating,
  (SELECT sr.school_rank
     FROM school_ranking sr 
     WHERE sr.school_id = swwz.school_id
     ORDER BY sr.school_rating DESC
     LIMIT 1) AS highest_school_rank,    
    CASE 
        WHEN sl.school_id IS NOT NULL THEN 'Required'
        ELSE 'Not Required'
    END AS lottery_requirement,
    ('https://www.rentfaster.ca'|| rl.link) AS link
FROM
    rental_listings rl
    INNER JOIN schools_within_walk_zone swwz ON swwz.listing_id = rl.id
    INNER JOIN listing_with_crime lwc ON lwc.listing_id = rl.id
    INNER JOIN community_crime c ON c.id = lwc.crime_id
    INNER JOIN schools s ON s.school_id = swwz.school_id
    INNER JOIN school_ranking sr ON sr.school_id = s.school_id
    LEFT JOIN school_lottery sl ON sl.school_id = s.school_id 
WHERE
    sr.school_group = 'elementary'
    AND rl.is_active = True
GROUP BY
    rl.id
ORDER BY
    highest_school_rating DESC,
    price ASC
'''

In [13]:
with sqlite3.connect('../database.db') as conn:
    df_tb = pd.read_sql_query(sql_query,conn)

In [14]:
gdf_tb = gpd.GeoDataFrame(df_tb,geometry=gpd.points_from_xy(df_tb['longitude'], df_tb['latitude'], crs="EPSG:4326"))

In [15]:
gdf_tb.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1548 entries, 0 to 1547
Data columns (total 18 columns):
 #   Column                     Non-Null Count  Dtype   
---  ------                     --------------  -----   
 0   community                  1548 non-null   object  
 1   type                       1548 non-null   object  
 2   beds                       1523 non-null   float64 
 3   has_den                    1548 non-null   int64   
 4   sq_feet                    1215 non-null   float64 
 5   baths                      1548 non-null   float64 
 6   cats                       1548 non-null   int64   
 7   dogs                       1548 non-null   int64   
 8   price                      1548 non-null   int64   
 9   latitude                   1548 non-null   float64 
 10  longitude                  1548 non-null   float64 
 11  crime_percentile           1548 non-null   float64 
 12  highest_rated_school_name  1548 non-null   object  
 13  highest_school_rating    

In [16]:
gdf_tb.reset_index(inplace=True)
gdf_tb["row_id"] = gdf_tb.index + 1
gdf_tb.reset_index(drop=True, inplace=True)
gdf_tb.set_index("row_id", inplace = True)

In [19]:
gdf_tb[gdf_tb['community']=='Mahogany']

Unnamed: 0_level_0,index,community,type,beds,has_den,sq_feet,baths,cats,dogs,price,latitude,longitude,crime_percentile,highest_rated_school_name,highest_school_rating,highest_school_rank,lottery_requirement,link,geometry
row_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1


In [17]:
gdf_tb.to_file('tableau_export.geojson',driver = 'GeoJSON')