In [None]:
## This is used for version control for the webdriver in Selenium.

#pip install webdriver-manager

In [1]:
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from time import sleep
import os
import datetime
import requests
import json
import pandas as pd
import re

In [2]:
## Functions used to call the API and return it as a dictionary.

def get_whole_json_dict():
    page = requests.get('https://api.bsc.strainz.tech/strainz')
    jsonObject = json.loads(page.text)
    characteristics = pd.DataFrame(pd.DataFrame(jsonObject)['metadata'].to_list())    
    return characteristics

def time_in_days(td):
    return (td.days) + (td.seconds/(3600*24))

In [3]:
driver = webdriver.Chrome(ChromeDriverManager().install())

[WDM] - Current google-chrome version is 88.0.4324
[WDM] - Get LATEST driver version for 88.0.4324
[WDM] - Driver [/Users/null/.wdm/drivers/chromedriver/mac64/88.0.4324.96/chromedriver] found in cache


 


### Initial WebScraping For Plants

In [4]:
## Navigate to the marketplace for Strainz.
driver.get("https://strainz.tech/marketplace/strainz")
sleep(10)

## Locate the xpath that corresponds to the tile.
market = driver.find_elements_by_xpath('/html/body/app-root/app-navigation/mat-sidenav-container/mat-sidenav-content/app-strainz-marketplace/section/div[2]')

## Parse the market file with regular expression and store it into a DataFrame called units_for_sale.
units_for_sale = pd.DataFrame(re.findall('(.*)\nGeneration: (\d+)\nBreeding Cost: (\d+)\nBuy for (\d+)\nGrows (\d+)',market[0].text), columns=['Name','Generation','BreedingCost','Price','Grow'])

units_for_sale[['Generation','BreedingCost','Price','Grow']] = units_for_sale[['Generation','BreedingCost','Price','Grow']].astype(int)

units_for_sale['PricePerBreedingCost'] = units_for_sale['Price']/units_for_sale['BreedingCost']
units_for_sale['PricePerGrow'] = units_for_sale['Price']/units_for_sale['Grow']
units_for_sale['DaysToCover'] = units_for_sale['Price']/units_for_sale['Grow']

### Add API data to determine amount harvestable on purchase.

In [5]:
## Capture the ids and store them as dna in the units_for_sale DataFrame.
ids = []
for each in driver.find_elements_by_class_name('mat-card-image'):
    ids.append(each.get_attribute('src').split('/')[-1])

units_for_sale['dna'] = ids

## 
units_for_sale['HarvestableTime'] = 0

harvest_dict = get_whole_json_dict()

harvest_dict['lastHarvest'] = harvest_dict['lastHarvest'].astype(int)

for api_values in units_for_sale['dna'].to_list():
    temp_df = harvest_dict[harvest_dict['dna']==api_values]
    temp_df = temp_df[temp_df['lastHarvest']==max(temp_df['lastHarvest'].to_list())].reset_index(drop=True)
    units_for_sale.loc[units_for_sale['dna']==api_values,'HarvestableTime'] = time_in_days((datetime.datetime.today() - datetime.datetime.fromtimestamp(temp_df.loc[0,'lastHarvest'])))
    
units_for_sale['HarvestUnits'] = units_for_sale['HarvestableTime'] * units_for_sale['Grow']

In [6]:
units_for_sale

Unnamed: 0,Name,Generation,BreedingCost,Price,Grow,PricePerBreedingCost,PricePerGrow,DaysToCover,dna,HarvestableTime,HarvestUnits
0,Cacao Diablo,3,1000,5500,220,5.50000,25.000000,25.000000,1111000032064064,0.171921,37.822685
1,Chemical Kush,0,2000,7400,227,3.70000,32.599119,32.599119,3213000198217187,0.713160,161.887257
2,Auto Amnesia,0,4000,12420,227,3.10500,54.713656,54.713656,4322000059080094,0.016701,3.791215
3,Auto Kush,1,1000,7420,197,7.42000,37.664975,37.664975,2115000172112058,0.016701,3.290174
4,White Kush,0,2000,8420,189,4.21000,44.550265,44.550265,1713000036099155,0.016701,3.156563
...,...,...,...,...,...,...,...,...,...,...,...
56,Gelato Alarm,3,2000,8000,289,4.00000,27.681661,27.681661,1762000000000000,0.915428,264.558762
57,Gelato Trinity,2,4000,8000,305,2.00000,26.229508,26.229508,7111000064000128,0.915428,279.205613
58,Hawaiian Kush,2,1000,25000,320,25.00000,78.125000,78.125000,2111000000000000,1.317025,421.448148
59,Exodus Croppa,1,4000,3999,166,0.99975,24.090361,24.090361,5354000032117130,0.122442,20.325394


### Fit a Linear Model To Marketplace

In [8]:
import numpy as np
from sklearn.linear_model import LinearRegression

In [9]:
## Perform one-hot encoding on the generation.

ohe_df = pd.get_dummies(units_for_sale['Generation'])
final_df = pd.concat([units_for_sale, ohe_df], axis=1)
X = final_df.drop(['Name','Generation','dna','PricePerBreedingCost','PricePerGrow','DaysToCover','HarvestableTime','HarvestUnits'],axis=1).drop(['Price'],axis=1).to_numpy()
Y = final_df['Price'].to_numpy()
reg = LinearRegression().fit(X,Y)