### Import packages

In [3]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
import re
from urllib.request import urlopen
from tqdm import tqdm
import random
import time
from selenium import webdriver

### Global variables

In [4]:
categories = {'sport': 'adventure-and-sport',
             'music': 'music',
             'film': 'film-and-video',
             'theatre': 'live-performance'}

### Functions

In [71]:
# Create an url
def create_url(page, category):
    _filter = '&categories[' + categories[category] + ']=on'
    return 'https://www.kisskissbankbank.com/en/discover?project[successful]=on' + _filter + '&page=' + str(page)

In [79]:
# Get a page from url
def get_page(url):
    ok = False
    page = requests.get(url)
    time.sleep(5)
    page = requests.get(url)
    if(page.status_code == 200):
        ok = True
    bs = BeautifulSoup(page.text, 'html.parser')
    return bs, ok

In [7]:
# Get the number of the projects from bs
def get_num_of_projects(bs):
    total_projects = bs.findAll('span', 'text__StyledText-sc-1jqe2sw-0 kKHxol k-u-color-font1 k-u-size-tiny k-u-weight-regular')
    return int(total_projects[0].contents[0].split(' ')[0].replace(',','')) 

In [8]:
# Get the number of pages from bs
def get_num_of_pages(bs):
    num_pro = get_num_of_projects(bs)
    return int(num_pro/9) + 1

In [83]:
# Function to go to project page and get information
def go_to_project(url_pro):
    
    # Load the website by driver
    chrome_path = './chromedriver.exe'
    driver = webdriver.Chrome(chrome_path)
    driver.get(url_pro)
    time.sleep(5)
    
    # Accept the cookie
    cookie_question = driver.find_element_by_xpath("//*[@id=\"App-react-component\"]/div[1]/div/div/div/div/button[2]")
    cookie_question.click()
    
    # Load the html page
    html = driver.page_source
    bs = BeautifulSoup(html, 'html.parser')
    
    # Calculate num_news, num_comments, num_contributions
    infos = bs.findAll('a', 'k-NavBar__link')
    news_bs = infos[2].findAll('span', 'badge__StyledBadge-sc-7liuod-0 eucluz k-Badge k-Badge--spaced')
    comments_bs = infos[3].findAll('span', 'badge__StyledBadge-sc-7liuod-0 eucluz k-Badge k-Badge--spaced')
    contributions_bs = infos[4].findAll('span', 'badge__StyledBadge-sc-7liuod-0 eucluz k-Badge k-Badge--spaced')
    
    if(len(news_bs) == 0):
        num_news = 0
    else:
        num_news = int(news_bs[0].contents[0])

    if(len(comments_bs) == 0):
        num_comments = 0
    else:
        num_comments = int(comments_bs[0].contents[0])

    if(len(contributions_bs) == 0):
        num_contributions = 0
    else:
        num_contributions = int(contributions_bs[0].contents[0])
    
    # Calculate num_rewards, min_price, max_price, and backers each price
    rewards = bs.findAll('div','marger__StyledMarger-sc-1qqifp5-0 kXGmDY')
    prices = []
    backers = []
    for j in range(len(rewards)):
        rj = rewards[j]
        price_j = rj.findAll('h2', 'title__StyledTitle-sc-46lshq-0 edmigd k-RewardCard__title k-u-margin-bottom-double k-u-margin-top-none')
        if(len(price_j) == 0):
            break
        price_j = float(price_j[0].contents[0].replace('€','').replace(',',''))
        prices.append(price_j)
        backers_j = rj.findAll('span', 'text__StyledText-sc-1jqe2sw-0 kKHxol k-u-color-font1 k-u-size-micro k-u-weight-regular k-RewardCard__infos k-RewardCard__infos--hasBottomMargin k-RewardCard__infos--disabled')
        bj = 0
        if(len(backers_j) != 0 and backers_j[0].contents[0].replace(' ','').isnumeric()):
            bj = int(backers_j[0].contents[0])
        backers.append(bj)
    prices_np = np.array(prices)
    backers_np = np.array(backers)
    min_price = np.min(prices_np)
    max_price = np.max(prices_np)
    num_rewards = len(prices_np)
    #print(backers_np)
    
    # Calculate num_creator_projects
    url_owner = bs.findAll('a', 'owner-info__StyledOwnerGrid-tqxc8c-0 jnrkmW')[0].get('href')
    driver_owner = webdriver.Chrome(chrome_path)
    driver_owner.get('https://www.kisskissbankbank.com'+ url_owner)
    time.sleep(5)
    cookie_question_owner = driver_owner.find_element_by_xpath('//*[@id="Header-react-component"]/div[1]/div/div/div/div/button[2]')
    cookie_question_owner.click()
    html_owner = driver_owner.page_source
    bs_owner = BeautifulSoup(html_owner, 'html.parser')
    num_creator_projects = int(bs_owner.findAll('span', 'k-Badge k-HorizontalNav__badge')[0].contents[0])
    #print(num_creator_projects)
    
    driver_owner.quit()
    driver.quit()
    return num_news, num_comments, num_contributions, num_rewards, min_price, max_price, prices_np, backers_np, num_creator_projects

In [110]:
# Function to collect all features
def collect_data(start = 1, end = 2, category = 'music'):
    samples = []
    reward_prices = []
    count = (start - 1)*9
    for i in tqdm(range(start,end)):
        url_i = create_url(i,category)
        #print(url_i)
        #bs_i, ok = get_page(url_i)
        # Load the website by driver
        chrome_path = './chromedriver.exe'
        driver_i = webdriver.Chrome(chrome_path)
        driver_i.get(url_i)
        time.sleep(5)

        # Accept the cookie
        cookie_question_i = driver_i.find_element_by_xpath("//*[@id=\"App-react-component\"]/div[1]/div/div/div/div/button[2]")
        cookie_question_i.click()

        # Load the html page
        html_i = driver_i.page_source
        bs_i = BeautifulSoup(html_i, 'html.parser')
        
        projects = bs_i.findAll('a', 'styles__StyledCrowdfundingCard-sc-1dxuhb7-0 dOcwdr k-CrowdfundingCard k-Card k-Card--light k-Card--withoutBoxShadowOnHover k-CrowdfundingCard--titlesMinHeight')
        for p in tqdm(projects):
            # Extract is_successful
            is_successful = p.findAll('span','state-badge__StyledState-sc-1ixvpgd-0 efdutF')[0].contents[1]

            # Extract percentage_fund
            percentage_fund = p.findAll('span', 'text__StyledText-sc-1jqe2sw-0 kKHxol k-u-color-font1 k-u-size-micro k-u-weight-regular k-CrowdfundingCard__progressBar__percent k-u-hidden@s-down')[0].contents[0]

            # Extract funded, target, in_2020, backers
            infos = p.findAll('div', 'k-CrowdfundingCard__informations__infoContainer')
            funded = infos[2].findAll('strong', 'text__StyledText-sc-1jqe2sw-0 kKHxol k-u-weight-regular info__StyledText-h7ofxb-0 jfWZBp')[0].contents[0]
            target = infos[2].findAll('span', 'k-u-hidden@s-down')[0].contents[0]
            in_2020 = infos[1].findAll('strong', 'text__StyledText-sc-1jqe2sw-0 kKHxol k-u-weight-regular info__StyledText-h7ofxb-0 jfWZBp')[0].contents[0]
            backers = p.findAll('strong', 'text__StyledText-sc-1jqe2sw-0 kKHxol k-u-weight-regular info__StyledText-h7ofxb-0 jfWZBp')[0].contents[0]

            # Extract category

            # Extract thumbnail type
            img = p.findAll('div','k-CrowdfundingCard__image__imageContainer')[0]
            thumbnail_type = img.find('img').get('src').split('.')[-1]

            # Extract remaining features
            url_pro = p.get('href')
            num_news, num_comments, num_contributions, num_rewards, min_price, max_price, prices_np, backers_np, num_creator_projects = go_to_project(url_pro)

            print(num_news, num_comments, num_contributions,num_rewards, min_price, max_price)
            #print(backers_np)
            #print(prices_np)
            #print(num_creator_projects)
            #print('----------------------------------')
            sample_i = [
                count,
                is_successful,
                percentage_fund,
                funded,
                target,
                in_2020,
                backers,
                category,
                thumbnail_type,
                num_rewards,
                min_price,
                max_price,
                num_news,
                num_comments,
                num_contributions,
                num_creator_projects
            ]
            for j in range(len(prices_np)):
                rp = [count, category, prices_np[j], backers_np[j]]
                reward_prices.append(rp)
            samples.append(sample_i)
            count += 1
            time.sleep(random.random()*2)
        driver_i.quit()
        time.sleep(random.random()*5)
    return samples, reward_prices


### Tests

In [72]:
url = create_url(2,'music')
bs, ok = get_page(url)
url, ok

('https://www.kisskissbankbank.com/en/discover?project[successful]=on&categories[music]=on&page=2',
 True)

In [11]:
get_num_of_projects(bs)

5679

In [12]:
num_pages = get_num_of_pages(bs)
num_pages

632

### Main

In [60]:
features_data = [
    'id',
    'is_successful',
    'percentage_fund',
    'funded',
    'target',
    'in_2020',
    'backers',
    'category',
    'thumbnail_type',
    'num_rewards',
    'min_price',
    'max_price',
    'num_news',
    'num_comments',
    'num_contributions',
    'num_creator_projects'
]
features_rewards = [
    'id',
    'category',
    'price',
    'backers'
]

In [111]:
data, rewards = collect_data(10,25,'music')

  0%|          | 0/15 [00:00<?, ?it/s]
  0%|          | 0/9 [00:00<?, ?it/s][A

1 31 97 12 2.0 5000.0



 11%|█         | 1/9 [00:44<05:52, 44.12s/it][A

2 23 86 4 30.0 200.0



 22%|██▏       | 2/9 [01:20<04:53, 41.94s/it][A

0 25 126 9 6.0 800.0



 33%|███▎      | 3/9 [01:58<04:02, 40.47s/it][A

0 14 79 7 5.0 200.0



 44%|████▍     | 4/9 [02:34<03:16, 39.29s/it][A

2 26 86 12 20.0 1750.0



 56%|█████▌    | 5/9 [03:11<02:34, 38.62s/it][A

2 18 89 10 10.0 800.0



 67%|██████▋   | 6/9 [03:50<01:56, 38.77s/it][A

0 18 97 8 15.0 1000.0



 78%|███████▊  | 7/9 [04:29<01:17, 38.69s/it][A

13 52 153 13 1.0 1000.0



 89%|████████▉ | 8/9 [05:06<00:38, 38.11s/it][A
100%|██████████| 9/9 [05:41<00:00, 37.89s/it][A

0 43 174 5 20.0 500.0



  7%|▋         | 1/15 [06:00<1:24:07, 360.52s/it]
  0%|          | 0/9 [00:00<?, ?it/s][A

0 39 209 9 5.0 80.0



 11%|█         | 1/9 [00:39<05:16, 39.52s/it][A

4 33 65 5 20.0 800.0



 22%|██▏       | 2/9 [01:17<04:33, 39.06s/it][A

1 32 105 9 20.0 1000.0



 33%|███▎      | 3/9 [01:53<03:48, 38.14s/it][A

4 23 104 5 15.0 180.0



 44%|████▍     | 4/9 [02:30<03:09, 37.83s/it][A

2 25 118 8 20.0 3000.0



 56%|█████▌    | 5/9 [03:09<02:33, 38.28s/it][A

2 10 103 3 20.0 60.0



 67%|██████▋   | 6/9 [03:44<01:51, 37.25s/it][A

3 13 124 7 5.0 100.0



 78%|███████▊  | 7/9 [04:36<01:23, 41.65s/it][A

0 12 81 9 12.0 1500.0



 89%|████████▉ | 8/9 [05:13<00:40, 40.28s/it][A

0 20 103 3 15.0 40.0



100%|██████████| 9/9 [05:50<00:00, 38.93s/it][A
 13%|█▎        | 2/15 [12:19<1:19:18, 366.07s/it]
  0%|          | 0/9 [00:00<?, ?it/s][A

1 5 24 6 10.0 300.0



 11%|█         | 1/9 [00:36<04:52, 36.54s/it][A

8 6 72 9 5.0 1000.0



 22%|██▏       | 2/9 [01:13<04:17, 36.74s/it][A

2 7 53 10 5.0 1250.0



 33%|███▎      | 3/9 [01:49<03:39, 36.54s/it][A

0 11 92 9 10.0 500.0



 44%|████▍     | 4/9 [02:26<03:03, 36.73s/it][A

10 22 92 18 12.0 2000.0



 56%|█████▌    | 5/9 [03:08<02:32, 38.10s/it][A

1 10 80 9 10.0 1000.0



 67%|██████▋   | 6/9 [03:44<01:52, 37.49s/it][A

6 8 41 6 20.0 500.0



 78%|███████▊  | 7/9 [04:21<01:14, 37.39s/it][A

1 15 104 9 5.0 600.0



 89%|████████▉ | 8/9 [04:57<00:37, 37.02s/it][A

2 10 93 9 5.0 1000.0



100%|██████████| 9/9 [05:35<00:00, 37.24s/it][A
 20%|██        | 3/15 [18:12<1:12:27, 362.27s/it]
  0%|          | 0/9 [00:00<?, ?it/s][A

0 18 133 8 15.0 600.0



 11%|█         | 1/9 [00:37<05:01, 37.65s/it][A

2 20 87 4 10.0 120.0



 22%|██▏       | 2/9 [01:14<04:20, 37.26s/it][A

0 3 18 9 5.0 600.0



 33%|███▎      | 3/9 [01:51<03:43, 37.22s/it][A

2 46 169 12 20.0 800.0



 44%|████▍     | 4/9 [02:29<03:07, 37.42s/it][A

7 8 39 10 5.0 550.0



 56%|█████▌    | 5/9 [03:05<02:29, 37.27s/it][A

4 41 560 11 10.0 450.0



 67%|██████▋   | 6/9 [03:42<01:51, 37.10s/it][A

0 19 100 14 5.0 500.0



 78%|███████▊  | 7/9 [04:21<01:15, 37.58s/it][A

1 11 32 5 5.0 600.0



 89%|████████▉ | 8/9 [04:58<00:37, 37.39s/it][A

3 2 34 5 1.0 100.0



100%|██████████| 9/9 [05:34<00:00, 37.20s/it][A
 27%|██▋       | 4/15 [24:06<1:05:55, 359.61s/it]
  0%|          | 0/9 [00:00<?, ?it/s][A

2 4 38 6 10.0 250.0



 11%|█         | 1/9 [00:38<05:11, 38.96s/it][A

0 11 63 7 5.0 500.0



 22%|██▏       | 2/9 [01:16<04:28, 38.39s/it][A

6 12 79 5 10.0 100.0



 33%|███▎      | 3/9 [01:52<03:47, 37.94s/it][A

2 11 87 10 10.0 600.0



 44%|████▍     | 4/9 [02:29<03:08, 37.63s/it][A

0 2 41 11 10.0 10000.0



 56%|█████▌    | 5/9 [03:07<02:30, 37.61s/it][A

23 15 110 10 5.0 250.0



 67%|██████▋   | 6/9 [03:43<01:51, 37.12s/it][A

7 15 83 5 10.0 200.0



 78%|███████▊  | 7/9 [04:19<01:13, 36.85s/it][A


2 54 243 7 5.0 150.0


 89%|████████▉ | 8/9 [04:54<00:36, 36.37s/it][A

0 40 238 13 5.0 650.0



100%|██████████| 9/9 [05:32<00:00, 36.91s/it][A
 33%|███▎      | 5/15 [30:05<59:54, 359.48s/it]  
  0%|          | 0/9 [00:00<?, ?it/s][A

9 12 90 15 5.0 750.0



 11%|█         | 1/9 [00:38<05:04, 38.06s/it][A

11 47 213 5 15.0 100.0



 22%|██▏       | 2/9 [01:15<04:24, 37.74s/it][A

1 19 106 8 8.0 80.0



 33%|███▎      | 3/9 [01:52<03:46, 37.71s/it][A

4 24 186 11 9.0 1200.0



 44%|████▍     | 4/9 [02:29<03:07, 37.41s/it][A

23 41 398 36 12.0 5000.0



 56%|█████▌    | 5/9 [03:07<02:30, 37.70s/it][A

6 20 109 7 7.0 300.0



 67%|██████▋   | 6/9 [03:43<01:51, 37.12s/it][A

3 35 178 7 15.0 1000.0



 78%|███████▊  | 7/9 [04:20<01:14, 37.21s/it][A

2 21 73 9 10.0 1000.0



 89%|████████▉ | 8/9 [04:57<00:36, 36.96s/it][A

0 32 116 8 10.0 1000.0



100%|██████████| 9/9 [05:34<00:00, 37.20s/it][A
 40%|████      | 6/15 [36:00<53:42, 358.02s/it]
  0%|          | 0/9 [00:00<?, ?it/s][A
 11%|█         | 1/9 [00:35<04:43, 35.45s/it][A

2 17 121 6 10.0 500.0
0 7 61 7 5.0 1000.0



 22%|██▏       | 2/9 [01:13<04:13, 36.18s/it][A

0 22 76 6 15.0 200.0



 33%|███▎      | 3/9 [01:48<03:35, 35.92s/it][A

0 21 157 7 10.0 3000.0



 44%|████▍     | 4/9 [02:25<03:00, 36.15s/it][A

2 22 161 18 5.0 15000.0



 56%|█████▌    | 5/9 [03:03<02:27, 36.76s/it][A

0 12 72 5 5.0 140.0



 67%|██████▋   | 6/9 [03:40<01:50, 36.68s/it][A
 78%|███████▊  | 7/9 [04:16<01:12, 36.47s/it][A

0 21 67 6 10.0 1000.0
0 32 158 5 20.0 1000.0



 89%|████████▉ | 8/9 [05:04<00:39, 39.93s/it][A

0 21 99 6 2.0 100.0



100%|██████████| 9/9 [05:40<00:00, 37.86s/it][A
 47%|████▋     | 7/15 [42:00<47:49, 358.64s/it]
  0%|          | 0/9 [00:00<?, ?it/s][A

9 17 108 4 25.0 100.0



 11%|█         | 1/9 [00:37<04:56, 37.11s/it][A

7 6 50 9 10.0 400.0



 22%|██▏       | 2/9 [01:14<04:19, 37.13s/it][A

0 25 140 9 5.0 850.0



 33%|███▎      | 3/9 [01:49<03:39, 36.57s/it][A

2 13 49 7 20.0 300.0



 44%|████▍     | 4/9 [02:27<03:04, 36.90s/it][A
 56%|█████▌    | 5/9 [03:02<02:25, 36.44s/it][A

0 13 80 9 10.0 160.0
4 17 70 9 10.0 1000.0



 67%|██████▋   | 6/9 [03:40<01:50, 36.93s/it][A

0 10 45 8 5.0 1000.0



 78%|███████▊  | 7/9 [04:18<01:14, 37.11s/it][A

0 11 85 9 15.0 1750.0



 89%|████████▉ | 8/9 [04:53<00:36, 36.69s/it][A

34 32 101 11 5.0 150.0



100%|██████████| 9/9 [05:30<00:00, 36.70s/it][A
 53%|█████▎    | 8/15 [47:49<41:31, 355.94s/it]
  0%|          | 0/9 [00:00<?, ?it/s][A

0 18 133 8 15.0 600.0



 11%|█         | 1/9 [00:37<05:03, 37.98s/it][A
 22%|██▏       | 2/9 [01:12<04:19, 37.05s/it][A

2 20 87 4 10.0 120.0
0 3 18 9 5.0 600.0



 33%|███▎      | 3/9 [01:49<03:41, 36.93s/it][A

2 46 169 12 20.0 800.0



 44%|████▍     | 4/9 [02:28<03:08, 37.67s/it][A

7 8 39 10 5.0 550.0



 56%|█████▌    | 5/9 [03:05<02:29, 37.43s/it][A

4 41 560 11 10.0 450.0



 67%|██████▋   | 6/9 [03:44<01:53, 37.76s/it][A

0 19 100 14 5.0 500.0



 78%|███████▊  | 7/9 [04:34<01:23, 41.57s/it][A

1 11 32 5 5.0 600.0



 89%|████████▉ | 8/9 [05:09<00:39, 39.55s/it][A

3 2 34 5 1.0 100.0



100%|██████████| 9/9 [05:44<00:00, 38.29s/it][A
 60%|██████    | 9/15 [53:52<35:47, 357.91s/it]
  0%|          | 0/9 [00:00<?, ?it/s][A
 11%|█         | 1/9 [00:37<05:03, 37.98s/it][A

0 6 82 6 10.0 150.0
1 19 78 9 5.0 650.0



 22%|██▏       | 2/9 [01:15<04:24, 37.72s/it][A

21 41 209 13 5.0 1800.0



 33%|███▎      | 3/9 [01:53<03:47, 37.97s/it][A

0 12 67 6 15.0 500.0



 44%|████▍     | 4/9 [02:32<03:11, 38.36s/it][A

4 22 93 11 5.0 2000.0



 56%|█████▌    | 5/9 [03:09<02:31, 37.96s/it][A

0 13 163 7 5.0 100.0



 67%|██████▋   | 6/9 [03:46<01:52, 37.44s/it][A

0 25 210 10 10.0 3000.0



 78%|███████▊  | 7/9 [04:24<01:15, 37.83s/it][A

0 12 60 17 10.0 300.0



 89%|████████▉ | 8/9 [05:02<00:37, 37.62s/it][A

10 33 133 11 10.0 2000.0



100%|██████████| 9/9 [05:39<00:00, 37.74s/it][A
 67%|██████▋   | 10/15 [59:49<29:48, 357.78s/it]
  0%|          | 0/9 [00:00<?, ?it/s][A
 11%|█         | 1/9 [00:36<04:52, 36.60s/it][A

0 18 133 8 15.0 600.0
2 20 87 4 10.0 120.0



 22%|██▏       | 2/9 [01:13<04:16, 36.59s/it][A

0 3 18 9 5.0 600.0



 33%|███▎      | 3/9 [01:49<03:39, 36.50s/it][A

2 46 169 12 20.0 800.0



 44%|████▍     | 4/9 [02:25<03:01, 36.32s/it][A

7 8 39 10 5.0 550.0



 56%|█████▌    | 5/9 [03:01<02:25, 36.27s/it][A

4 41 560 11 10.0 450.0



 67%|██████▋   | 6/9 [03:38<01:49, 36.50s/it][A

0 19 100 14 5.0 500.0



 78%|███████▊  | 7/9 [04:17<01:14, 37.35s/it][A

1 11 32 5 5.0 600.0



 89%|████████▉ | 8/9 [04:55<00:37, 37.35s/it][A

3 2 34 5 1.0 100.0



100%|██████████| 9/9 [05:31<00:00, 36.84s/it][A
 73%|███████▎  | 11/15 [1:05:41<23:43, 355.87s/it]
  0%|          | 0/9 [00:00<?, ?it/s][A

0 7 84 6 5.0 700.0



 11%|█         | 1/9 [00:36<04:48, 36.11s/it][A

0 4 40 3 5.0 50.0



 22%|██▏       | 2/9 [01:12<04:14, 36.33s/it][A

0 17 111 13 5.0 100.0



 33%|███▎      | 3/9 [01:50<03:40, 36.77s/it][A
 44%|████▍     | 4/9 [02:31<03:09, 37.99s/it][A

11 21 131 13 5.0 400.0



 56%|█████▌    | 5/9 [03:07<02:29, 37.33s/it][A

13 5 63 8 5.0 1450.0
2 16 123 17 5.0 1000.0



 67%|██████▋   | 6/9 [03:43<01:50, 36.88s/it][A

0 8 65 17 15.0 3000.0



 78%|███████▊  | 7/9 [04:22<01:14, 37.47s/it][A

4 17 150 7 10.0 600.0



 89%|████████▉ | 8/9 [04:58<00:37, 37.12s/it][A

1 11 86 11 5.0 1200.0



100%|██████████| 9/9 [05:36<00:00, 37.39s/it][A
 80%|████████  | 12/15 [1:11:37<17:48, 356.05s/it]
  0%|          | 0/9 [00:00<?, ?it/s][A

4 17 150 7 10.0 600.0



 11%|█         | 1/9 [00:37<04:58, 37.27s/it][A

1 11 86 11 5.0 1200.0



 22%|██▏       | 2/9 [01:13<04:19, 37.04s/it][A

0 23 70 6 20.0 1500.0



 33%|███▎      | 3/9 [01:50<03:42, 37.03s/it][A

0 9 79 8 10.0 1000.0



 44%|████▍     | 4/9 [02:26<03:03, 36.63s/it][A
 56%|█████▌    | 5/9 [03:02<02:26, 36.58s/it][A

3 29 214 7 10.0 500.0
0 15 96 9 7.0 300.0



 67%|██████▋   | 6/9 [03:38<01:49, 36.40s/it][A

1 19 144 8 8.0 500.0



 78%|███████▊  | 7/9 [04:15<01:13, 36.58s/it][A

1 21 105 7 5.0 500.0



 89%|████████▉ | 8/9 [04:53<00:37, 37.00s/it][A

1 10 69 10 1.0 100.0



100%|██████████| 9/9 [05:51<00:00, 39.07s/it][A
 87%|████████▋ | 13/15 [1:17:55<12:04, 362.45s/it]
  0%|          | 0/9 [00:00<?, ?it/s][A

1 8 67 7 5.0 300.0



 11%|█         | 1/9 [00:37<05:00, 37.54s/it][A

7 21 103 8 5.0 500.0



 22%|██▏       | 2/9 [01:13<04:20, 37.16s/it][A

0 1 7 7 5.0 1000.0



 33%|███▎      | 3/9 [01:50<03:42, 37.04s/it][A

0 13 62 12 5.0 500.0



 44%|████▍     | 4/9 [02:26<03:03, 36.74s/it][A

0 13 103 5 18.0 100.0



 56%|█████▌    | 5/9 [03:03<02:27, 36.77s/it][A

0 13 70 7 5.0 1000.0



 67%|██████▋   | 6/9 [03:40<01:50, 36.96s/it][A

1 9 75 6 10.0 1000.0



 78%|███████▊  | 7/9 [04:17<01:13, 36.77s/it][A

2 16 80 12 5.0 10000.0



 89%|████████▉ | 8/9 [04:58<00:38, 38.07s/it][A

4 21 97 3 20.0 65.0



100%|██████████| 9/9 [05:34<00:00, 37.21s/it][A
 93%|█████████▎| 14/15 [1:23:51<06:00, 360.72s/it]
  0%|          | 0/9 [00:00<?, ?it/s][A

3 4 37 15 5.0 400.0



 11%|█         | 1/9 [00:47<06:19, 47.45s/it][A

0 7 42 8 5.0 1000.0



 22%|██▏       | 2/9 [01:26<05:14, 44.89s/it][A

0 28 185 7 10.0 500.0



 33%|███▎      | 3/9 [02:03<04:15, 42.51s/it][A
 44%|████▍     | 4/9 [02:38<03:21, 40.27s/it][A

2 4 66 7 10.0 500.0
0 8 93 8 10.0 500.0



 56%|█████▌    | 5/9 [03:15<02:37, 39.34s/it][A

3 23 88 13 5.0 1000.0



 67%|██████▋   | 6/9 [03:53<01:57, 39.06s/it][A

0 4 40 6 3.0 500.0



 78%|███████▊  | 7/9 [04:32<01:17, 38.94s/it][A

0 5 26 12 5.0 1000.0



 89%|████████▉ | 8/9 [05:08<00:37, 37.96s/it][A

1 18 58 6 20.0 900.0



100%|██████████| 9/9 [05:45<00:00, 38.34s/it][A
100%|██████████| 15/15 [1:30:03<00:00, 360.24s/it]


In [112]:
data_df = pd.DataFrame(data, columns = features_data)
data_df

Unnamed: 0,id,is_successful,percentage_fund,funded,target,in_2020,backers,category,thumbnail_type,num_rewards,min_price,max_price,num_news,num_comments,num_contributions,num_creator_projects
0,81,successful,114 %,"€5,679","Out of €5,000",8/5/2020,92,music,png,12,2.0,5000.0,1,31,97,1
1,82,successful,100 %,"€9,000","Out of €9,000",8/4/2020,80,music,jpg,4,30.0,200.0,2,23,86,1
2,83,successful,125 %,"€3,124","Out of €2,500",8/4/2020,124,music,jpg,9,6.0,800.0,0,25,126,1
3,84,successful,103 %,"€4,115","Out of €4,000",8/2/2020,73,music,jpg,7,5.0,200.0,0,14,79,1
4,85,successful,101 %,"€9,089","Out of €9,000",8/1/2020,76,music,JPG,12,20.0,1750.0,2,26,86,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130,211,successful,111 %,"€4,440","Out of €4,000",2/17/2020,88,music,png,8,10.0,500.0,0,8,93,5
131,212,successful,113 %,"€4,520","Out of €4,000",2/16/2020,86,music,jpg,13,5.0,1000.0,3,23,88,1
132,213,successful,103 %,"€3,618","Out of €3,500",2/10/2020,37,music,png,6,3.0,500.0,0,4,40,1
133,214,successful,100 %,"€3,000","Out of €3,000",2/7/2020,20,music,png,12,5.0,1000.0,0,5,26,1


In [113]:
rewards_df_ = pd.DataFrame(rewards, columns = features_rewards)
rewards_df

Unnamed: 0,id,category,price,backers
0,0,music,15.0,25
1,0,music,25.0,42
2,0,music,35.0,25
3,0,music,55.0,19
4,0,music,80.0,5
...,...,...,...,...
877,98,music,1.0,1
878,98,music,10.0,6
879,98,music,30.0,8
880,98,music,50.0,1


In [114]:
data_df.to_csv('./data/data_music_2.csv', index=False)
rewards_df.to_csv('./data/rewards_music_2.csv', index=False)