# Simple Streamlit App

In [1]:
import pandas as pd
import os
from dotenv import load_dotenv

from utils.b2 import B2

In [2]:
load_dotenv()

True

## Accessing B2

For more information on the code behind the helper functions in `utils.b2`, take a look at the [Backblaze API documentation](https://www.backblaze.com/b2/docs/python.html).

In [3]:
os.environ['B2_KEYNAME']

'simple-streamlit-app'

In [4]:
b2 = B2(endpoint=os.environ['B2_ENDPOINT'],
        key_id=os.environ['B2_KEYID'],
        secret_key=os.environ['B2_APPKEY'])

### Set Bucket

In [7]:
# set connection to the correct bucket
b2.set_bucket(os.environ['B2_BUCKETNAME'])

In [9]:
[f.key for f in b2.bucket.objects.all()]

['seattle_home_prices.csv']

### Upload Data

In [11]:
local_path = './data/seattle_home_prices.csv'
remote_path = 'seattle_home_prices.csv'     # this could also be a folder

In [10]:
# "overwrite" file, or upload a new one
# b2.to_b2(local_path, remote_path)

### Download Data to pandas

In [35]:
df_prices = b2.to_df(remote_path)

## Training Model

In [23]:
from sklearn.linear_model import LinearRegression
# from sklearn.model_selection import train_test_split

In [37]:
# VERY basic data cleaning
features = ['SQUARE FEET', 'BEDS']
target = 'PRICE'

df_prices = df_prices[features + [target]]
df_prices.dropna(inplace=True)

In [38]:
df_prices.head()

Unnamed: 0,SQUARE FEET,BEDS,LATITUDE,LONGITUDE,PRICE
0,760.0,2,47.631632,-122.347049,459000
1,1170.0,3,47.667321,-122.32326,648000
2,1510.0,3,47.504199,-122.360753,440000
3,1820.0,3,47.67905,-122.30142,715000
4,1013.0,1,47.622291,-122.339972,749900


In [41]:
X = df_prices[features]
y = df_prices[target]

lm = LinearRegression()

lm.fit(X, y)

In [42]:
# is this a good metric?
lm.score(X, y)

0.6515795964985773

## Saving Model

In [29]:
import pickle

In [31]:
# notice this is a bytes object "wb"
with open("./model.pickle", 'wb') as f:
    pickle.dump(lm, f)

## Loading Model

In [32]:
# notice this is a bytes object "wb"
with open("./model.pickle", 'rb') as f:
    lm_ = pickle.load(f)

In [34]:
lm_.score(X, y)

0.6379477265284501

## Running on Streamlit

Test with `streamlit run app.py`. Use the "Deploy" button to deploy to Streamlit Cloud.