In [1]:
import pandas as pd
import re
from scraper import *
from functions import *

In [None]:
bcolors_blue = '\033[94m'
bcolors_red = '\033[31m'
bold='\033[01m'
reset='\033[0m'

recipe_cat = 'aperitif, bases, drink, dessert, starter, main or all'

if __name__ == "__main__":
    print(f'Which category you want to choose? {bold}{bcolors_blue} {recipe_cat}{reset}.\
           Select one or more categories(sepretated by \",\" )\n\n') 

    hasKey = None
    category=None
    
    while not hasKey:
        category = input()
        pattern = re.compile("^[a-zA-Z,]+$")
        try:
            if not pattern.match(category):
                raise TypeError('Invalid category. please try again!')
            else: 
                for i in category.split(','):
                    if i not in recipe_cat:
                        raise Exception('Invalid category. please try again!')
            hasKey = True
        except(TypeError,Exception) as error: 
            print(error)
            continue

    
    
    hrefs = collectPath(category)

    data = {'title':[], 'total_time':[], 'prep_time':[], 'cook_time':[], 'difficulty':[],
                'comments':[], 'rating':[], 'category':[], 'yield':[],'ingredients':[], 'link':[], 'image':[]}
    for url in hrefs : 
        print(url, ' loading ...')
        scraper = Scraper()
        scraper.load(url)
        data['link'].append(url)
        data['title'].append(scraper.title())
        data['total_time'].append(scraper.time('total_time'))
        data['prep_time'].append(scraper.time('prep_time'))
        data['cook_time'].append(scraper.time('cook_time'))
        data['difficulty'].append(scraper.difficulty())
        data['comments'].append(scraper.comments())
        data['rating'].append(scraper.rating())
        data['category'].append(scraper.category())
        data['yield'].append(scraper.yields())
        data['ingredients'].append(scraper.ingredients())
        data['image'].append(scraper.image())

    

Which category you want to choose? [01m[94m aperitif, bases, drink, dessert, starter, main or all[0m.           Select one or more categories(sepretated by "," )




 all


Collect URLs https://www.cuisineaz.com/categories/aperitif-cat48640
Collect URLs https://www.cuisineaz.com/categories/bases-cat48653
Collect URLs https://www.cuisineaz.com/categories/boissons-cat48664
Collect URLs https://www.cuisineaz.com/categories/desserts-cat48681
Collect URLs https://www.cuisineaz.com/categories/entrees-cat48785
Collect URLs https://www.cuisineaz.com/categories/plats-cat48816
https://www.cuisineaz.com/recettes/baguette-aux-champignons-epinards-et-boursin-echalote-ciboulette-110691.aspx  loading ...
https://www.cuisineaz.com/recettes/pull-apart-bread-aux-courgettes-et-au-boursin-onctueux-ail-fines-herbes-110685.aspx  loading ...
https://www.cuisineaz.com/recettes/cones-feuilletes-au-jambon-et-boursin-echalote-ciboulette-110475.aspx  loading ...
https://www.cuisineaz.com/recettes/herisson-aperitif-96981.aspx  loading ...
https://www.cuisineaz.com/recettes/mousse-d-avocats-aux-crevettes-14215.aspx  loading ...
https://www.cuisineaz.com/recettes/mousse-de-thon-legere-

In [None]:
data

In [None]:
df = pd.DataFrame(data)
df.sample(50)

## Data Cleaning

In [None]:
df.info()

In [None]:
null_cols = df.isna().sum()
null_cols[null_cols > 0]

In [None]:
df.total_time.fillna('0 min', inplace=True)
df.prep_time.fillna('0 min', inplace=True)
df.cook_time.fillna('0 min', inplace=True)

In [None]:
df.total_time = df.total_time.map(get_minutes)
df.prep_time = df.prep_time.map(get_minutes)
df.cook_time = df.cook_time.map(get_minutes)

In [None]:
df.comments = df.comments.astype('int64')

In [None]:
df.difficulty = df.difficulty.map(rename_difficulty)
df.category = df.category.map(rename_category)

In [None]:
df.sample(50)

In [None]:
null_cols = df.isna().sum()
null_cols[null_cols > 0]

In [None]:
df.difficulty.fillna('missing',inplace=True)

In [None]:
df.describe()

In [None]:
df.isnull().sum()>0


## Export Data

In [None]:
df.to_csv('output/new_recipes.csv', index=False)

In [None]:
import pymysql
from getpass import getpass
from sqlalchemy import create_engine
from sqlalchemy import inspect

In [None]:
username='root'
password=getpass()
server='localhost'
database='cuisine_az'
engine = create_engine(f'mysql+pymysql://{username}:{password}@{server}/{database}')
df.to_sql(name='new_recipes',con=engine, if_exists='replace',index=False)

In [None]:
data_sql = pd.read_sql_query('SELECT * FROM cuisine_az.recipes', engine)
data_sql

In [None]:
inspector = inspect(engine)
inspector.get_table_names()