In [None]:
from datetime import datetime

from dotenv import load_dotenv
from haversine import haversine, Unit
from sqlalchemy import create_engine
from sqlalchemy.engine.url import URL
import seaborn as sns
import os
import pandas as pd

In [None]:
load_dotenv('./env.env')

driver_name = 'postgresql+psycopg2'
url = URL.create(
    drivername=driver_name,
    username=os.environ['USERNAME'],
    password=os.environ['PASSWORD'],
    host=os.environ['HOST'],
    port=os.environ['PORT'],
    database=os.environ['DB']
)
engine = create_engine(url)
db_connection = engine.connect()

In [None]:
query = """
select * from custom limit 10
"""

df = pd.read_sql(query, db_connection)
df.info()

In [None]:
dfs = []

for index, row in df.iterrows():
    dfs.append(pd.DataFrame.from_dict({k: [v] for k, v in row['inspection_document']['props'].items()}))

In [None]:
df_inpsection_documents = pd.concat(dfs, ignore_index=True)
df_inpsection_documents.head()

In [None]:
from functools import partial


final_df = (
    df_inpsection_documents
    .map(lambda x: float(split[0]) if x and (split := str(x).split()) and split[0].replace(".", "").isnumeric() else x)
)
final_df.replace({'FALSE': 0, 'False': 0, 'false': 0, '': 0, 'TRUE': 1, 'True': 1, 'true': 1}, inplace=True)
final_df = final_df.apply(partial(pd.to_numeric, errors='ignore')).select_dtypes(include='int')

In [None]:
import matplotlib.pyplot as plt

corr = final_df.corr()

f, ax = plt.subplots(figsize=(12, 8))

sns.heatmap(corr, cmap="Blues", annot=True, square=False, ax=ax)
plt.title('Pearson Correlation of Features')
plt.yticks(rotation=45)

In [None]:
final_df.info()