### Import packages

In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
import re
from urllib.request import urlopen
from tqdm import tqdm
import random
import time
from selenium import webdriver

### Global variables

In [2]:
categories = {'sport': 'adventure-and-sport',
             'music': 'music',
             'film': 'film-and-video',
             'theatre': 'live-performance'}

### Functions

In [3]:
# Create an url
def create_url(page, category):
    _filter = '&categories[' + categories[category] + ']=on'
    return 'https://www.kisskissbankbank.com/en/discover?project[successful]=on' + _filter + '&page=' + str(page)

In [4]:
# Get a page from url
def get_page(url):
    ok = False
    page = requests.get(url)
    time.sleep(5)
    page = requests.get(url)
    if(page.status_code == 200):
        ok = True
    bs = BeautifulSoup(page.text, 'html.parser')
    return bs, ok

In [5]:
# Get the number of the projects from bs
def get_num_of_projects(bs):
    total_projects = bs.findAll('span', 'text__StyledText-sc-1jqe2sw-0 kKHxol k-u-color-font1 k-u-size-tiny k-u-weight-regular')
    return int(total_projects[0].contents[0].split(' ')[0].replace(',','')) 

In [6]:
# Get the number of pages from bs
def get_num_of_pages(bs):
    num_pro = get_num_of_projects(bs)
    return int(num_pro/9) + 1

In [12]:
# Function to go to project page and get information
def go_to_project(url_pro):
    
    # Load the website by driver
    chrome_path = './chromedriver.exe'
    driver = webdriver.Chrome(chrome_path)
    driver.get(url_pro)
    time.sleep(5)
    
    # Accept the cookie
    cookie_question = driver.find_element_by_xpath("//*[@id=\"App-react-component\"]/div[1]/div/div/div/div/button[2]")
    cookie_question.click()
    
    # Load the html page
    html = driver.page_source
    bs = BeautifulSoup(html, 'html.parser')
    
    # Calculate num_news, num_comments, num_contributions
    infos = bs.findAll('a', 'k-NavBar__link')
    news_bs = infos[2].findAll('span', 'badge__StyledBadge-sc-7liuod-0 eucluz k-Badge k-Badge--spaced')
    comments_bs = infos[3].findAll('span', 'badge__StyledBadge-sc-7liuod-0 eucluz k-Badge k-Badge--spaced')
    contributions_bs = infos[4].findAll('span', 'badge__StyledBadge-sc-7liuod-0 eucluz k-Badge k-Badge--spaced')
    
    if(len(news_bs) == 0):
        num_news = 0
    else:
        num_news = int(news_bs[0].contents[0])

    if(len(comments_bs) == 0):
        num_comments = 0
    else:
        num_comments = int(comments_bs[0].contents[0])

    if(len(contributions_bs) == 0):
        num_contributions = 0
    else:
        num_contributions = int(contributions_bs[0].contents[0])
    
    # Calculate num_rewards, min_price, max_price, and backers each price
    rewards = bs.findAll('div','marger__StyledMarger-sc-1qqifp5-0 kXGmDY')
    prices = []
    backers = []
    for j in range(len(rewards)):
        rj = rewards[j]
        price_j = rj.findAll('h2', 'title__StyledTitle-sc-46lshq-0 edmigd k-RewardCard__title k-u-margin-bottom-double k-u-margin-top-none')
        if(len(price_j) == 0):
            break
        price_j = float(price_j[0].contents[0].replace('€','').replace(',',''))
        prices.append(price_j)
        backers_j = rj.findAll('span', 'text__StyledText-sc-1jqe2sw-0 kKHxol k-u-color-font1 k-u-size-micro k-u-weight-regular k-RewardCard__infos k-RewardCard__infos--hasBottomMargin k-RewardCard__infos--disabled')
        bj = 0
        if(len(backers_j) != 0 and backers_j[0].contents[0].replace(' ','').isnumeric()):
            bj = int(backers_j[0].contents[0])
        backers.append(bj)
    prices_np = np.array(prices)
    backers_np = np.array(backers)
    if(len(prices_np) == 0):
        min_price = -1
        max_price = -1
    else:
        min_price = np.min(prices_np)
        max_price = np.max(prices_np)
    num_rewards = len(prices_np)
    #print(backers_np)
    
    # Calculate num_creator_projects
    url_owner = bs.findAll('a', 'owner-info__StyledOwnerGrid-tqxc8c-0 jnrkmW')[0].get('href')
    driver_owner = webdriver.Chrome(chrome_path)
    driver_owner.get('https://www.kisskissbankbank.com'+ url_owner)
    time.sleep(5)
    cookie_question_owner = driver_owner.find_element_by_xpath('//*[@id="Header-react-component"]/div[1]/div/div/div/div/button[2]')
    cookie_question_owner.click()
    html_owner = driver_owner.page_source
    bs_owner = BeautifulSoup(html_owner, 'html.parser')
    num_creator_projects = int(bs_owner.findAll('span', 'k-Badge k-HorizontalNav__badge')[0].contents[0])
    #print(num_creator_projects)
    
    driver_owner.quit()
    driver.quit()
    return num_news, num_comments, num_contributions, num_rewards, min_price, max_price, prices_np, backers_np, num_creator_projects

In [37]:
# Function to collect all features
def collect_data(start = 1, end = 2, category = 'music'):
    samples = []
    reward_prices = []
    count = (start - 1)*9
    for i in tqdm(range(start,end)):
        url_i = create_url(i,category)
        #print(url_i)
        #bs_i, ok = get_page(url_i)
        # Load the website by driver - First time
        chrome_path = './chromedriver.exe'
        driver_i = webdriver.Chrome(chrome_path)
        driver_i.get(url_i)
        time.sleep(10)

        # Accept the cookie
        cookie_question_i = driver_i.find_element_by_xpath("//*[@id=\"App-react-component\"]/div[1]/div/div/div/div/button[2]")
        cookie_question_i.click()

        # Load the html page
        html_i = driver_i.page_source
        bs_i = BeautifulSoup(html_i, 'html.parser')
        
        projects = bs_i.findAll('a', 'styles__StyledCrowdfundingCard-sc-1dxuhb7-0 dOcwdr k-CrowdfundingCard k-Card k-Card--light k-Card--withoutBoxShadowOnHover k-CrowdfundingCard--titlesMinHeight')
        for p in tqdm(projects):
            # Extract is_successful
            is_successful = p.findAll('span','state-badge__StyledState-sc-1ixvpgd-0 efdutF')[0].contents[1]

            # Extract percentage_fund
            percentage_fund = p.findAll('span', 'text__StyledText-sc-1jqe2sw-0 kKHxol k-u-color-font1 k-u-size-micro k-u-weight-regular k-CrowdfundingCard__progressBar__percent k-u-hidden@s-down')[0].contents[0]

            # Extract funded, target, in_2020, backers
            infos = p.findAll('div', 'k-CrowdfundingCard__informations__infoContainer')
            funded = infos[2].findAll('strong', 'text__StyledText-sc-1jqe2sw-0 kKHxol k-u-weight-regular info__StyledText-h7ofxb-0 jfWZBp')[0].contents[0]
            target = infos[2].findAll('span', 'k-u-hidden@s-down')[0].contents[0]
            in_2020 = infos[1].findAll('strong', 'text__StyledText-sc-1jqe2sw-0 kKHxol k-u-weight-regular info__StyledText-h7ofxb-0 jfWZBp')[0].contents[0]
            backers = p.findAll('strong', 'text__StyledText-sc-1jqe2sw-0 kKHxol k-u-weight-regular info__StyledText-h7ofxb-0 jfWZBp')[0].contents[0]

            # Extract category

            # Extract thumbnail type
            img = p.findAll('div','k-CrowdfundingCard__image__imageContainer')[0]
            thumbnail_type = img.find('img').get('src').split('.')[-1]

            # Extract remaining features
            url_pro = p.get('href')
            num_news, num_comments, num_contributions, num_rewards, min_price, max_price, prices_np, backers_np, num_creator_projects = go_to_project(url_pro)

            #print(num_news, num_comments, num_contributions,num_rewards, min_price, max_price)
            #print(backers_np)
            #print(prices_np)
            #print(num_creator_projects)
            #print('----------------------------------')
            sample_i = [
                count,
                is_successful,
                percentage_fund,
                funded,
                target,
                in_2020,
                backers,
                category,
                thumbnail_type,
                num_rewards,
                min_price,
                max_price,
                num_news,
                num_comments,
                num_contributions,
                num_creator_projects
            ]
            print(sample_i)
            for j in range(len(prices_np)):
                rp = [count, category, prices_np[j], backers_np[j]]
                reward_prices.append(rp)
            samples.append(sample_i)
            count += 1
            time.sleep(random.random()*2)
        driver_i.quit()
        time.sleep(random.random()*5)
    return samples, reward_prices


### Tests

In [72]:
url = create_url(2,'music')
bs, ok = get_page(url)
url, ok

('https://www.kisskissbankbank.com/en/discover?project[successful]=on&categories[music]=on&page=2',
 True)

In [11]:
get_num_of_projects(bs)

5679

In [12]:
num_pages = get_num_of_pages(bs)
num_pages

632

In [None]:
data, rewards = collect_data(7,8,'theatre')

0


### Main

In [44]:
features_data = [
    'id',
    'is_successful',
    'percentage_fund',
    'funded',
    'target',
    'in_2020',
    'backers',
    'category',
    'thumbnail_type',
    'num_rewards',
    'min_price',
    'max_price',
    'num_news',
    'num_comments',
    'num_contributions',
    'num_creator_projects'
]
features_rewards = [
    'id',
    'category',
    'price',
    'backers'
]

In [41]:
data, rewards = collect_data(1,12,'theatre')

  0%|          | 0/11 [00:00<?, ?it/s]
  0%|          | 0/9 [00:00<?, ?it/s][A

[0, 'successful', '100 %', '€4,005', 'Out of €4,000', '12/16/2020', '15', 'theatre', 'jpg', 9, 5.0, 600.0, 0, 3, 18, 1]



 11%|█         | 1/9 [00:37<04:56, 37.03s/it][A

[1, 'successful', '131 %', '€7,859', 'Out of €6,000', '12/16/2020', '40', 'theatre', 'jpeg', 7, 20.0, 500.0, 0, 6, 43, 1]



 22%|██▏       | 2/9 [01:13<04:18, 36.88s/it][A

[2, 'successful', '104 %', '€4,250', 'Out of €4,100', '12/16/2020', '58', 'theatre', 'png', 7, 5.0, 1000.0, 0, 10, 60, 1]



 33%|███▎      | 3/9 [02:13<04:23, 43.90s/it][A

[3, 'successful', '103 %', '€7,761', 'Out of €7,500', '11/28/2020', '52', 'theatre', 'jpg', 7, 25.0, 1000.0, 0, 13, 52, 1]



 44%|████▍     | 4/9 [02:51<03:29, 41.94s/it][A

[4, 'successful', '118 %', '€3,080', 'Out of €2,600', '11/9/2020', '61', 'theatre', 'jpg', 7, 10.0, 1500.0, 5, 12, 61, 6]



 56%|█████▌    | 5/9 [03:27<02:41, 40.35s/it][A

[5, 'successful', '101 %', '€3,030', 'Out of €3,000', '11/7/2020', '26', 'theatre', 'jpeg', 3, 10.0, 50.0, 1, 4, 26, 1]



 67%|██████▋   | 6/9 [04:03<01:56, 38.88s/it][A

[6, 'successful', '100 %', '€3,352', 'Out of €3,350', '10/31/2020', '32', 'theatre', 'jpg', 5, 20.0, 500.0, 0, 1, 33, 1]



 78%|███████▊  | 7/9 [04:39<01:16, 38.13s/it][A

[7, 'successful', '114 %', '€5,369', 'Out of €4,700', '10/3/2020', '103', 'theatre', 'jpg', 7, 5.0, 300.0, 0, 15, 104, 1]



 89%|████████▉ | 8/9 [05:17<00:37, 37.98s/it][A

[8, 'successful', '112 %', '€11,196', 'Out of €10,000', '9/8/2020', '60', 'theatre', 'jpg', 7, 10.0, 1000.0, 1, 16, 63, 1]



100%|██████████| 9/9 [05:53<00:00, 39.26s/it][A
  9%|▉         | 1/11 [06:20<1:03:24, 380.46s/it]
  0%|          | 0/9 [00:00<?, ?it/s][A

[9, 'successful', '125 %', '€6,250', 'Out of €5,000', '9/7/2020', '101', 'theatre', 'JPG', 5, 15.0, 100.0, 0, 16, 102, 1]



 11%|█         | 1/9 [00:36<04:49, 36.16s/it][A

[10, 'successful', '118 %', '€4,735', 'Out of €4,000', '8/31/2020', '38', 'theatre', 'jpg', 6, 10.0, 100.0, 0, 2, 39, 1]



 22%|██▏       | 2/9 [01:13<04:15, 36.50s/it][A

[11, 'successful', '101 %', '€3,430', 'Out of €3,400', '8/11/2020', '60', 'theatre', 'jpeg', 11, 5.0, 800.0, 0, 19, 68, 1]



 33%|███▎      | 3/9 [01:50<03:39, 36.65s/it][A

[12, 'successful', '109 %', '€6,200', 'Out of €5,700', '8/5/2020', '78', 'theatre', 'jpg', 7, 10.0, 400.0, 7, 19, 81, 1]



 44%|████▍     | 4/9 [02:27<03:03, 36.71s/it][A

[13, 'successful', '106 %', '€4,455', 'Out of €4,200', '7/24/2020', '32', 'theatre', 'jpg', 6, 10.0, 1000.0, 1, 3, 32, 1]



 56%|█████▌    | 5/9 [03:06<02:29, 37.32s/it][A

[14, 'successful', '101 %', '€5,035', 'Out of €5,000', '7/18/2020', '72', 'theatre', 'png', 8, 5.0, 500.0, 0, 16, 77, 1]



 67%|██████▋   | 6/9 [03:44<01:52, 37.51s/it][A

[15, 'successful', '106 %', '€10,647', 'Out of €10,000', '6/29/2020', '241', 'theatre', 'jpg', 7, 5.0, 150.0, 3, 16, 244, 1]



 78%|███████▊  | 7/9 [04:19<01:13, 36.98s/it][A
 89%|████████▉ | 8/9 [04:55<00:36, 36.61s/it][A

[16, 'successful', '111 %', '€7,570', 'Out of €6,800', '6/24/2020', '94', 'theatre', 'jpg', 8, 10.0, 2000.0, 0, 31, 94, 1]
[17, 'successful', '104 %', '€9,985', 'Out of €9,600', '6/20/2020', '164', 'theatre', 'jpg', 13, 10.0, 1650.0, 3, 38, 169, 1]



100%|██████████| 9/9 [05:31<00:00, 36.87s/it][A
 18%|█▊        | 2/11 [12:21<56:12, 374.69s/it]  
  0%|          | 0/9 [00:00<?, ?it/s][A

[18, 'successful', '132 %', '€5,270', 'Out of €4,000', '6/17/2020', '80', 'theatre', 'jpg', 11, 10.0, 3000.0, 1, 22, 88, 1]



 11%|█         | 1/9 [00:38<05:07, 38.44s/it][A

[19, 'successful', '111 %', '€16,599', 'Out of €15,000', '6/16/2020', '239', 'theatre', 'jpg', 7, 10.0, 1000.0, 4, 51, 241, 1]



 22%|██▏       | 2/9 [01:14<04:23, 37.63s/it][A

[20, 'successful', '100 %', '€6,020', 'Out of €6,000', '6/8/2020', '36', 'theatre', 'jpg', 11, 10.0, 10000.0, 0, 2, 41, 1]



 33%|███▎      | 3/9 [01:51<03:46, 37.67s/it][A

[21, 'successful', '227 %', '€6,819', 'Out of €3,000', '6/2/2020', '241', 'theatre', 'png', 7, 5.0, 150.0, 2, 54, 243, 1]



 44%|████▍     | 4/9 [02:28<03:07, 37.47s/it][A
 56%|█████▌    | 5/9 [03:05<02:29, 37.33s/it][A

[22, 'successful', '147 %', '€17,595', 'Out of €12,000', '5/28/2020', '170', 'theatre', 'jpg', 6, 10.0, 3000.0, 9, 49, 177, 1]
[23, 'successful', '105 %', '€6,305', 'Out of €6,000', '5/18/2020', '81', 'theatre', 'jpg', 5, 10.0, 1000.0, 0, 6, 93, 1]



 67%|██████▋   | 6/9 [03:42<01:50, 36.96s/it][A
 78%|███████▊  | 7/9 [04:20<01:14, 37.50s/it][A

[24, 'successful', '108 %', '€4,300', 'Out of €4,000', '5/15/2020', '69', 'theatre', 'jpg', 5, 10.0, 200.0, 2, 12, 72, 1]
[25, 'successful', '100 %', '€4,010', 'Out of €4,000', '5/9/2020', '40', 'theatre', 'jpg', 5, 5.0, 400.0, 0, 7, 43, 1]



 89%|████████▉ | 8/9 [04:56<00:36, 36.85s/it][A

[26, 'successful', '114 %', '€9,145', 'Out of €8,000', '5/6/2020', '109', 'theatre', 'jpg', 9, 10.0, 1500.0, 2, 12, 115, 1]



100%|██████████| 9/9 [05:31<00:00, 36.85s/it][A
 27%|██▋       | 3/11 [18:18<49:13, 369.21s/it]
  0%|          | 0/9 [00:00<?, ?it/s][A

[27, 'successful', '237 %', '€4,265', 'Out of €1,800', '5/1/2020', '67', 'theatre', 'png', 8, 5.0, 1800.0, 4, 24, 69, 1]



 11%|█         | 1/9 [00:37<05:03, 37.90s/it][A

[28, 'successful', '105 %', '€7,350', 'Out of €7,000', '4/25/2020', '80', 'theatre', 'jpg', 9, 20.0, 800.0, 8, 4, 84, 1]



 22%|██▏       | 2/9 [01:16<04:26, 38.05s/it][A

[29, 'successful', '105 %', '€3,160', 'Out of €3,000', '4/10/2020', '62', 'theatre', 'jpg', 6, 10.0, 150.0, 0, 6, 82, 1]



 33%|███▎      | 3/9 [01:53<03:47, 37.89s/it][A

[30, 'successful', '109 %', '€5,464', 'Out of €5,000', '4/5/2020', '101', 'theatre', 'jpg', 6, 30.0, 450.0, 0, 29, 103, 1]



 44%|████▍     | 4/9 [02:30<03:07, 37.48s/it][A

[31, 'successful', '120 %', '€3,610', 'Out of €3,000', '3/21/2020', '39', 'theatre', 'png', 7, 10.0, 200.0, 0, 5, 43, 2]



 56%|█████▌    | 5/9 [03:06<02:28, 37.18s/it][A

[32, 'successful', '109 %', '€6,515', 'Out of €6,000', '3/10/2020', '57', 'theatre', 'jpg', 8, 5.0, 1450.0, 13, 5, 63, 2]



 67%|██████▋   | 6/9 [03:43<01:50, 36.91s/it][A

[33, 'successful', '105 %', '€10,471', 'Out of €10,000', '3/8/2020', '148', 'theatre', 'png', 7, 10.0, 600.0, 4, 17, 150, 1]



 78%|███████▊  | 7/9 [04:19<01:13, 36.64s/it][A

[34, 'successful', '110 %', '€4,848', 'Out of €4,400', '3/7/2020', '70', 'theatre', 'jpg', 5, 10.0, 300.0, 12, 18, 75, 1]



 89%|████████▉ | 8/9 [04:54<00:36, 36.39s/it][A

[35, 'successful', '111 %', '€5,570', 'Out of €5,000', '2/19/2020', '84', 'theatre', 'jpg', 5, 10.0, 150.0, 0, 9, 97, 1]



100%|██████████| 9/9 [05:32<00:00, 36.89s/it][A
 36%|███▋      | 4/11 [24:15<42:40, 365.77s/it]
  0%|          | 0/9 [00:00<?, ?it/s][A

[36, 'successful', '111 %', '€4,440', 'Out of €4,000', '2/17/2020', '88', 'theatre', 'png', 8, 10.0, 500.0, 0, 8, 93, 5]



 11%|█         | 1/9 [00:37<05:02, 37.76s/it][A

[37, 'successful', '112 %', '€11,160', 'Out of €10,000', '2/7/2020', '134', 'theatre', 'png', 8, 10.0, 750.0, 0, 17, 138, 1]



 22%|██▏       | 2/9 [01:14<04:22, 37.54s/it][A

[38, 'successful', '121 %', '€4,714', 'Out of €3,888', '2/4/2020', '58', 'theatre', 'JPG', 3, 30.0, 100.0, 0, 12, 63, 1]



 33%|███▎      | 3/9 [01:50<03:41, 36.92s/it][A
 44%|████▍     | 4/9 [02:25<03:02, 36.56s/it][A

[39, 'successful', '106 %', '€3,170', 'Out of €3,000', '2/3/2020', '54', 'theatre', 'jpg', 10, 5.0, 200.0, 2, 7, 68, 1]
[40, 'successful', '102 %', '€5,105', 'Out of €5,000', '2/2/2020', '41', 'theatre', 'jpg', 11, 10.0, 50000.0, 0, 12, 44, 2]



 56%|█████▌    | 5/9 [03:03<02:27, 36.92s/it][A

[41, 'successful', '119 %', '€3,580', 'Out of €3,000', '1/25/2020', '35', 'theatre', 'png', 9, 5.0, 200.0, 0, 4, 35, 1]



 67%|██████▋   | 6/9 [03:41<01:51, 37.19s/it][A

[42, 'successful', '117 %', '€3,495', 'Out of €3,000', '1/24/2020', '41', 'theatre', 'jpg', 12, 10.0, 500.0, 0, 29, 64, 1]



 78%|███████▊  | 7/9 [04:19<01:14, 37.30s/it][A

[43, 'successful', '101 %', '€3,040', 'Out of €3,000', '1/22/2020', '43', 'theatre', 'jpg', 6, 5.0, 250.0, 0, 9, 46, 1]



 89%|████████▉ | 8/9 [04:56<00:37, 37.22s/it][A

[44, 'successful', '118 %', '€5,881', 'Out of €5,000', '1/3/2020', '85', 'theatre', 'jpg', 7, 10.0, 500.0, 0, 16, 88, 1]



100%|██████████| 9/9 [05:36<00:00, 37.35s/it][A
 45%|████▌     | 5/11 [30:15<36:23, 363.95s/it]
  0%|          | 0/9 [00:00<?, ?it/s][A

[45, 'successful', '100 %', '€4,005', 'Out of €4,000', '12/16/2020', '15', 'theatre', 'jpg', 9, 5.0, 600.0, 0, 3, 18, 1]



 11%|█         | 1/9 [00:37<04:56, 37.01s/it][A

[46, 'successful', '131 %', '€7,859', 'Out of €6,000', '12/16/2020', '40', 'theatre', 'jpeg', 7, 20.0, 500.0, 0, 6, 43, 1]



 22%|██▏       | 2/9 [01:16<04:24, 37.77s/it][A

[47, 'successful', '104 %', '€4,250', 'Out of €4,100', '12/16/2020', '58', 'theatre', 'png', 7, 5.0, 1000.0, 0, 10, 60, 1]



 33%|███▎      | 3/9 [01:58<03:53, 38.98s/it][A

[48, 'successful', '103 %', '€7,761', 'Out of €7,500', '11/28/2020', '52', 'theatre', 'jpg', 7, 25.0, 1000.0, 0, 13, 52, 1]



 44%|████▍     | 4/9 [02:36<03:14, 38.81s/it][A

[49, 'successful', '118 %', '€3,080', 'Out of €2,600', '11/9/2020', '61', 'theatre', 'jpg', 7, 10.0, 1500.0, 5, 12, 61, 6]



 56%|█████▌    | 5/9 [03:13<02:32, 38.14s/it][A

[50, 'successful', '101 %', '€3,030', 'Out of €3,000', '11/7/2020', '26', 'theatre', 'jpeg', 3, 10.0, 50.0, 1, 4, 26, 1]



 67%|██████▋   | 6/9 [03:48<01:51, 37.24s/it][A

[51, 'successful', '100 %', '€3,352', 'Out of €3,350', '10/31/2020', '32', 'theatre', 'jpg', 5, 20.0, 500.0, 0, 1, 33, 1]



 78%|███████▊  | 7/9 [04:24<01:13, 36.95s/it][A

[52, 'successful', '114 %', '€5,369', 'Out of €4,700', '10/3/2020', '103', 'theatre', 'jpg', 7, 5.0, 300.0, 0, 15, 104, 1]



 89%|████████▉ | 8/9 [05:01<00:37, 37.04s/it][A

[53, 'successful', '112 %', '€11,196', 'Out of €10,000', '9/8/2020', '60', 'theatre', 'jpg', 7, 10.0, 1000.0, 1, 16, 63, 1]



100%|██████████| 9/9 [05:37<00:00, 37.50s/it][A
 55%|█████▍    | 6/11 [36:18<30:17, 363.58s/it]
  0%|          | 0/9 [00:00<?, ?it/s][A

[54, 'successful', '100 %', '€4,005', 'Out of €4,000', '12/16/2020', '15', 'theatre', 'jpg', 9, 5.0, 600.0, 0, 3, 18, 1]



 11%|█         | 1/9 [00:41<05:34, 41.80s/it][A

[55, 'successful', '131 %', '€7,859', 'Out of €6,000', '12/16/2020', '40', 'theatre', 'jpeg', 7, 20.0, 500.0, 0, 6, 43, 1]



 22%|██▏       | 2/9 [01:18<04:41, 40.15s/it][A

[56, 'successful', '104 %', '€4,250', 'Out of €4,100', '12/16/2020', '58', 'theatre', 'png', 7, 5.0, 1000.0, 0, 10, 60, 1]



 33%|███▎      | 3/9 [01:55<03:55, 39.20s/it][A

[57, 'successful', '103 %', '€7,761', 'Out of €7,500', '11/28/2020', '52', 'theatre', 'jpg', 7, 25.0, 1000.0, 0, 13, 52, 1]



 44%|████▍     | 4/9 [02:31<03:12, 38.40s/it][A

[58, 'successful', '118 %', '€3,080', 'Out of €2,600', '11/9/2020', '61', 'theatre', 'jpg', 7, 10.0, 1500.0, 5, 12, 61, 6]



 56%|█████▌    | 5/9 [03:08<02:31, 37.85s/it][A

[59, 'successful', '101 %', '€3,030', 'Out of €3,000', '11/7/2020', '26', 'theatre', 'jpeg', 3, 10.0, 50.0, 1, 4, 26, 1]



 67%|██████▋   | 6/9 [03:43<01:51, 37.03s/it][A

[60, 'successful', '100 %', '€3,352', 'Out of €3,350', '10/31/2020', '32', 'theatre', 'jpg', 5, 20.0, 500.0, 0, 1, 33, 1]



 78%|███████▊  | 7/9 [04:19<01:13, 36.92s/it][A

[61, 'successful', '114 %', '€5,369', 'Out of €4,700', '10/3/2020', '103', 'theatre', 'jpg', 7, 5.0, 300.0, 0, 15, 104, 1]



 89%|████████▉ | 8/9 [04:55<00:36, 36.51s/it][A

[62, 'successful', '112 %', '€11,196', 'Out of €10,000', '9/8/2020', '60', 'theatre', 'jpg', 7, 10.0, 1000.0, 1, 16, 63, 1]



100%|██████████| 9/9 [05:31<00:00, 36.89s/it][A
 64%|██████▎   | 7/11 [42:13<24:03, 360.96s/it]
  0%|          | 0/9 [00:00<?, ?it/s][A

[63, 'successful', '101 %', '€6,066', 'Out of €6,000', '7/20/2019', '76', 'theatre', 'jpg', 8, 5.0, 1000.0, 1, 7, 82, 1]



 11%|█         | 1/9 [00:36<04:53, 36.72s/it][A

[64, 'successful', '100 %', '€6,019', 'Out of €6,000', '7/14/2019', '35', 'theatre', 'jpg', 8, 10.0, 2000.0, 0, 9, 37, 1]



 22%|██▏       | 2/9 [01:12<04:14, 36.36s/it][A

[65, 'successful', '100 %', '€6,820', 'Out of €6,800', '7/9/2019', '58', 'theatre', 'jpg', 12, 1.0, 6800.0, 1, 18, 64, 1]



 33%|███▎      | 3/9 [01:49<03:39, 36.59s/it][A

[66, 'successful', '100 %', '€5,009', 'Out of €5,000', '7/8/2019', '48', 'theatre', 'JPG', 9, 5.0, 600.0, 0, 6, 49, 1]



 44%|████▍     | 4/9 [02:26<03:04, 36.90s/it][A

[67, 'successful', '134 %', '€4,020', 'Out of €3,000', '7/8/2019', '38', 'theatre', 'jpg', 8, 5.0, 1000.0, 0, 4, 38, 1]



 56%|█████▌    | 5/9 [03:04<02:27, 36.95s/it][A

[68, 'successful', '101 %', '€5,070', 'Out of €5,000', '7/7/2019', '53', 'theatre', 'jpg', 7, 12.0, 500.0, 7, 14, 63, 1]



 67%|██████▋   | 6/9 [03:43<01:53, 37.76s/it][A

[69, 'successful', '162 %', '€8,101', 'Out of €5,000', '7/5/2019', '136', 'theatre', 'gif', 5, 10.0, 200.0, 0, 15, 136, 1]



 78%|███████▊  | 7/9 [04:20<01:15, 37.51s/it][A

[70, 'successful', '113 %', '€6,215', 'Out of €5,500', '7/5/2019', '46', 'theatre', 'jpg', 6, 20.0, 100.0, 7, 15, 46, 1]



 89%|████████▉ | 8/9 [04:56<00:37, 37.02s/it][A

[71, 'successful', '103 %', '€5,140', 'Out of €5,000', '6/27/2019', '80', 'theatre', 'jpg', 7, 10.0, 500.0, 6, 7, 83, 1]



100%|██████████| 9/9 [05:36<00:00, 37.40s/it][A
 73%|███████▎  | 8/11 [48:18<18:07, 362.44s/it]
  0%|          | 0/9 [00:00<?, ?it/s][A

[72, 'successful', '100 %', '€4,005', 'Out of €4,000', '12/16/2020', '15', 'theatre', 'jpg', 9, 5.0, 600.0, 0, 3, 18, 1]



 11%|█         | 1/9 [00:37<04:57, 37.14s/it][A

[73, 'successful', '131 %', '€7,859', 'Out of €6,000', '12/16/2020', '40', 'theatre', 'jpeg', 7, 20.0, 500.0, 0, 6, 43, 1]



 22%|██▏       | 2/9 [01:12<04:16, 36.62s/it][A

[74, 'successful', '104 %', '€4,250', 'Out of €4,100', '12/16/2020', '58', 'theatre', 'png', 7, 5.0, 1000.0, 0, 10, 60, 1]



 33%|███▎      | 3/9 [01:49<03:39, 36.58s/it][A

[75, 'successful', '103 %', '€7,761', 'Out of €7,500', '11/28/2020', '52', 'theatre', 'jpg', 7, 25.0, 1000.0, 0, 13, 52, 1]



 44%|████▍     | 4/9 [02:24<03:01, 36.25s/it][A

[76, 'successful', '118 %', '€3,080', 'Out of €2,600', '11/9/2020', '61', 'theatre', 'jpg', 7, 10.0, 1500.0, 5, 12, 61, 6]



 56%|█████▌    | 5/9 [03:01<02:25, 36.46s/it][A
 67%|██████▋   | 6/9 [03:35<01:47, 35.83s/it][A

[77, 'successful', '101 %', '€3,030', 'Out of €3,000', '11/7/2020', '26', 'theatre', 'jpeg', 3, 10.0, 50.0, 1, 4, 26, 1]
[78, 'successful', '100 %', '€3,352', 'Out of €3,350', '10/31/2020', '32', 'theatre', 'jpg', 5, 20.0, 500.0, 0, 1, 33, 1]



 78%|███████▊  | 7/9 [04:12<01:12, 36.14s/it][A

[79, 'successful', '114 %', '€5,369', 'Out of €4,700', '10/3/2020', '103', 'theatre', 'jpg', 7, 5.0, 300.0, 0, 15, 104, 1]



 89%|████████▉ | 8/9 [04:49<00:36, 36.34s/it][A

[80, 'successful', '112 %', '€11,196', 'Out of €10,000', '9/8/2020', '60', 'theatre', 'jpg', 7, 10.0, 1000.0, 1, 16, 63, 1]



100%|██████████| 9/9 [05:26<00:00, 36.29s/it][A
 82%|████████▏ | 9/11 [54:12<11:59, 359.80s/it]
  0%|          | 0/9 [00:00<?, ?it/s][A

[81, 'successful', '104 %', '€5,202', 'Out of €5,000', '6/13/2019', '107', 'theatre', 'jpg', 9, 5.0, 500.0, 2, 18, 115, 1]



 11%|█         | 1/9 [00:37<05:03, 37.92s/it][A

[82, 'successful', '101 %', '€5,050', 'Out of €5,000', '6/11/2019', '51', 'theatre', 'jpg', 5, 10.0, 500.0, 0, 3, 54, 1]



 22%|██▏       | 2/9 [01:13<04:20, 37.28s/it][A

[83, 'successful', '109 %', '€5,470', 'Out of €5,000', '6/9/2019', '81', 'theatre', 'png', 12, 10.0, 5000.0, 0, 13, 82, 1]



 33%|███▎      | 3/9 [01:50<03:42, 37.14s/it][A

[84, 'successful', '127 %', '€7,600', 'Out of €6,000', '6/7/2019', '81', 'theatre', 'jpg', 5, 10.0, 100.0, 14, 10, 82, 1]



 44%|████▍     | 4/9 [02:28<03:06, 37.25s/it][A

[85, 'successful', '128 %', '€6,954', 'Out of €5,454', '6/3/2019', '97', 'theatre', 'jpg', 9, 15.0, 2000.0, 9, 28, 101, 1]



 56%|█████▌    | 5/9 [03:04<02:28, 37.05s/it][A

[86, 'successful', '100 %', '€3,010', 'Out of €3,000', '5/24/2019', '51', 'theatre', 'jpg', 6, 5.0, 100.0, 0, 9, 55, 2]



 67%|██████▋   | 6/9 [03:40<01:50, 36.67s/it][A

[87, 'successful', '125 %', '€3,490', 'Out of €2,800', '5/20/2019', '68', 'theatre', 'jpg', 11, 5.0, 1500.0, 8, 7, 69, 2]



 78%|███████▊  | 7/9 [04:17<01:13, 36.67s/it][A

[88, 'successful', '104 %', '€3,110', 'Out of €3,000', '5/17/2019', '51', 'theatre', 'JPG', 9, 5.0, 1500.0, 0, 5, 51, 1]



 89%|████████▉ | 8/9 [04:52<00:36, 36.30s/it][A

[89, 'successful', '101 %', '€16,090', 'Out of €16,000', '5/15/2019', '259', 'theatre', 'png', 16, 5.0, 800.0, 1, 65, 274, 1]



100%|██████████| 9/9 [05:29<00:00, 36.61s/it][A
 91%|█████████ | 10/11 [1:00:08<05:58, 358.71s/it]
  0%|          | 0/9 [00:00<?, ?it/s][A

[90, 'successful', '105 %', '€3,135', 'Out of €3,000', '5/9/2019', '51', 'theatre', 'png', 4, 10.0, 500.0, 0, 20, 51, 1]



 11%|█         | 1/9 [00:37<04:58, 37.30s/it][A

[91, 'successful', '100 %', '€5,437', 'Out of €5,435', '5/8/2019', '62', 'theatre', 'png', 10, 5.0, 1000.0, 1, 10, 65, 1]



 22%|██▏       | 2/9 [01:15<04:22, 37.54s/it][A

[92, 'successful', '101 %', '€8,910', 'Out of €8,800', '4/30/2019', '67', 'theatre', 'jpg', 4, 10.0, 300.0, 0, 43, 84, 1]



 33%|███▎      | 3/9 [01:52<03:45, 37.50s/it][A

[93, 'successful', '108 %', '€3,785', 'Out of €3,500', '4/27/2019', '66', 'theatre', 'png', 10, 5.0, 350.0, 0, 8, 74, 5]



 44%|████▍     | 4/9 [02:31<03:09, 37.95s/it][A

[94, 'successful', '103 %', '€6,205', 'Out of €6,000', '4/17/2019', '40', 'theatre', 'jpg', 17, 10.0, 1050.0, 4, 17, 44, 1]



 56%|█████▌    | 5/9 [03:07<02:29, 37.29s/it][A

[95, 'successful', '101 %', '€5,060', 'Out of €5,000', '4/16/2019', '79', 'theatre', 'jpg', 5, 15.0, 120.0, 11, 5, 82, 1]



 67%|██████▋   | 6/9 [03:45<01:52, 37.38s/it][A

[96, 'successful', '109 %', '€3,825', 'Out of €3,500', '4/15/2019', '58', 'theatre', 'jpg', 5, 2.0, 250.0, 27, 17, 82, 1]



 78%|███████▊  | 7/9 [04:21<01:14, 37.08s/it][A

[97, 'successful', '103 %', '€3,085', 'Out of €3,000', '4/11/2019', '90', 'theatre', 'jpg', 7, 10.0, 1000.0, 3, 22, 91, 1]



 89%|████████▉ | 8/9 [04:58<00:36, 36.89s/it][A

[98, 'successful', '118 %', '€21,150', 'Out of €18,000', '4/8/2019', '164', 'theatre', 'png', 5, 20.0, 500.0, 2, 50, 181, 1]



100%|██████████| 9/9 [05:34<00:00, 37.17s/it][A
100%|██████████| 11/11 [1:06:13<00:00, 361.20s/it]


In [46]:
data_df = pd.DataFrame(data, columns = features_data)
data_df

Unnamed: 0,id,is_successful,percentage_fund,funded,target,in_2020,backers,category,thumbnail_type,num_rewards,min_price,max_price,num_news,num_comments,num_contributions,num_creator_projects
0,0,successful,100 %,"€4,005","Out of €4,000",12/16/2020,15,theatre,jpg,9,5.0,600.0,0,3,18,1
1,1,successful,131 %,"€7,859","Out of €6,000",12/16/2020,40,theatre,jpeg,7,20.0,500.0,0,6,43,1
2,2,successful,104 %,"€4,250","Out of €4,100",12/16/2020,58,theatre,png,7,5.0,1000.0,0,10,60,1
3,3,successful,103 %,"€7,761","Out of €7,500",11/28/2020,52,theatre,jpg,7,25.0,1000.0,0,13,52,1
4,4,successful,118 %,"€3,080","Out of €2,600",11/9/2020,61,theatre,jpg,7,10.0,1500.0,5,12,61,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94,94,successful,103 %,"€6,205","Out of €6,000",4/17/2019,40,theatre,jpg,17,10.0,1050.0,4,17,44,1
95,95,successful,101 %,"€5,060","Out of €5,000",4/16/2019,79,theatre,jpg,5,15.0,120.0,11,5,82,1
96,96,successful,109 %,"€3,825","Out of €3,500",4/15/2019,58,theatre,jpg,5,2.0,250.0,27,17,82,1
97,97,successful,103 %,"€3,085","Out of €3,000",4/11/2019,90,theatre,jpg,7,10.0,1000.0,3,22,91,1


In [50]:
rewards_df = pd.DataFrame(rewards, columns = features_rewards)
rewards_df

Unnamed: 0,id,category,price,backers
0,0,theatre,5.0,0
1,0,theatre,10.0,4
2,0,theatre,20.0,3
3,0,theatre,30.0,1
4,0,theatre,50.0,1
...,...,...,...,...
722,98,theatre,20.0,64
723,98,theatre,50.0,25
724,98,theatre,100.0,17
725,98,theatre,200.0,2


In [51]:
data_df.to_csv('./data/data_theatre_1_11.csv', index=False)
rewards_df.to_csv('./data/rewards_theatre_1_11.csv', index=False)