In [76]:
import os
from dotenv import load_dotenv

import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

from snowflake.connector import connect

In [77]:
load_dotenv()

True

In [78]:
username = os.environ.get('SNOWFLAKE_USERNAME')
password = os.environ.get('SNOWFLAKE_PASSWORD')
account = os.environ.get('SNOWFLAKE_ACCOUNT')

In [79]:
con = connect(
    user=username,
    password=password,
    account=account
)

In [80]:
con

<snowflake.connector.connection.SnowflakeConnection at 0x7f8d98964460>

In [81]:
cursor = con.cursor()

In [82]:
def run_sql(cursor, query, single_row=False, print_results=False):
    exe = cursor.execute(query)
    print(f"Execution: {exe}")
    if single_row is True:
        result = cursor.fetchone()
        if print_results is True: print(f"Result: {result}")
        return result
    results = cursor.fetchall()
    if print_results is True: print(f"Results: {results}")
    return results
    

In [83]:
run_sql(cursor, "USE WAREHOUSE COMPUTE_WH")

Execution: <snowflake.connector.cursor.SnowflakeCursor object at 0x7f8d98964100>


[('Statement executed successfully.',)]

In [84]:
run_sql(cursor,'USE DATABASE AIRBNB_CALENDAR_LISTINGS_SAMPLE')

Execution: <snowflake.connector.cursor.SnowflakeCursor object at 0x7f8d98964100>


[('Statement executed successfully.',)]

In [85]:
practice_query = """
                SELECT DISTINCT 
                    c."listing_id", c."price", p."reviews_count", p."city", 
                    p."room_type_category", p."space_type", p."bedrooms", p."bathrooms"
                FROM "AIRBNB_CALENDAR_SAMPLE" c
                JOIN "AIRBNB_LISTINGS_SAMPLE" p ON p."listing_id" = c."listing_id" AND p."crawled_at_ds" = c."ds"
                WHERE 
                    c."price" IS NOT NULL
                        AND
                    c."currency" = 'USD'
                ORDER BY c."price" DESC
                --LIMIT 1000000
                """

In [86]:
data = run_sql(cursor, practice_query)

Execution: <snowflake.connector.cursor.SnowflakeCursor object at 0x7f8d98964100>


In [87]:
len(data)

2589842

In [88]:
column_names = ['listing_id', 'price', 'review_counts',\
                'city', 'room_type_category', 'space_type', 'bedrooms', 'bathrooms']

In [89]:
df_airbnb = pd.DataFrame(data, columns=column_names)

In [90]:
df_airbnb.head()

Unnamed: 0,listing_id,price,review_counts,city,room_type_category,space_type,bedrooms,bathrooms
0,33196476,1983148,81,,entire_home,Entire rental unit,4.0,4.0
1,33196476,1272563,81,,entire_home,Entire rental unit,4.0,4.0
2,40900114,60287,0,San Andrés,private_room,Hotel room,2.0,3.0
3,40900114,60053,0,San Andrés,private_room,Hotel room,2.0,3.0
4,40900114,59829,0,San Andrés,private_room,Hotel room,2.0,3.0


In [92]:
col1, col2 = "price", "review_counts"
corr = df_airbnb[col1].corr(df_airbnb[col2])
print(corr)

-0.01663597606288046


In [93]:
col1, col2 = "price", "bedrooms"
corr = df_airbnb[col1].corr(df_airbnb[col2])
print(corr)

0.06512141814911311


In [94]:
col1, col2 = "price", "bathrooms"
corr = df_airbnb[col1].corr(df_airbnb[col2])
print(corr)

0.05741000671447289
