# Imports 

In [1]:
from math import ceil
from time import sleep
from random import randint
from bs4 import BeautifulSoup


from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager

import pandas as pd
import numpy as np
import requests
import json
import pickle

# SPSS Fitting

## Macro

In [2]:
with open('data/filled_selected_campaigns.pkl', 'rb') as f:
    projects = pickle.load(f)

In [3]:
projects = [p for p in projects if str(min([e.date for e in p.donations])) != '1970-01-01']

In [4]:
for p in projects:
    p.start_date = min([e.date for e in p.actualities + p.donations] + p.comments + [p.end_date - pd.to_timedelta(60, unit='D')])
    p.duration = (p.end_date - p.start_date).days
    
    # DONATIONS
    p.num_don = len(p.donations)
    amounts = [don.amount for don in p.donations if don.amount != 0]
    p.mean_don = sum(amounts)/len(amounts)
    p.jump_1 = 0
    p.jump_mean = 0
    p.max_rewards = 0
    
    for d in p.donations:
        if d.amount > 1:
            p.jump_1 += 1
        if d.amount > p.mean_don:
            p.jump_mean += 1
        if d.amount >= p.hrp:
            p.max_rewards += 1

In [5]:
campaigns = [[c.link, c.title, c.description, c.project_holder, c.current_amount, c.aimed_amount, c.nb_contrib, c.completion_rate, c.categories, len(c.comments), c.num_pers, c.num_info, c.end_date, c.num_don, c.mean_don, c.jump_1, c.jump_mean, c.max_rewards, c.other_projects, c.start_date, c.duration] for c in projects]

In [6]:
cols = ['link', 'title', 'desc', 'project_holder', 'Amount', 'aimed_amount', 'nb_contrib', 'ComplRate', 'categories',
        'NComment', 'NPers', 'NInfo', 'end_date', 'NDon', 'ADon', 'NJump1', 'NJumpA', 'NDonLevel', 'OtherProject', 
        'start_date', 'Duration']
df = pd.DataFrame(campaigns, columns=cols)

In [7]:
df.loc[:, 'nb_actus'] = df.NPers + df.NInfo
df.loc[:, 'OverSuccess'] = df.ComplRate > 105

In [8]:
cats = []
for c in set(df.categories):
    cats.extend(c.split(', '))
cats = list(set(cats))

cultura = [
    "Photo d'art",
    'Enregistrement',
    'Art numérique',
    'BD',
    'Long-métrage',
    'Documentaire',
    'Films & vidéo',
    'Danse',
    'Musique',
    'Clip',
    'Art & photo',
    'Peinture',
    'Websérie',
    'Roman',
    'Livres',
    'Court-métrage',
    'Illustration',
    'Théâtre et danse',
    'Photo-reportage',
    'Reportage',
    'Théâtre',
    'Street art',
    'Webdocumentaire',
    "Livre d'art"
]

df.loc[:, 'BCulture'] = df.categories.str.contains('|'.join(cultura))

In [9]:
df = df.astype({'BCulture': 'int', 'OtherProject': 'int', 'OverSuccess': 'int'})

In [10]:
macro_df = df[['link', 'Amount', 'ComplRate', 'OverSuccess', 'Duration', 'NDon', 'ADon', 'NInfo', 
               'NPers', 'NComment', 'NJumpA', 'NJump1', 'NDonLevel', 'BCulture', 'OtherProject']]

In [12]:
fb_df = pd.read_excel('C:/Users/tangu/Downloads/Macro_Final_E_PourMerge.xls')

In [13]:
macro_df = macro_df.merge(fb_df, on='link', how='left').fillna(5)

In [14]:
macro_df = macro_df.astype({'BCulture': 'int', 'OtherProject': 'int', 'OverSuccess': 'int', 'Facebook': 'int', 'NFacebook': 'int'})

In [15]:
macro_df.to_csv('data/output/macro.csv', sep=';')

## Micro

In [45]:
project_list = []
for p in projects:
    current_date = p.end_date - pd.to_timedelta(p.duration, unit='d')
    event_dates = set([e.date for e in p.actualities + p.donations] + p.comments)
    event_by_date = {}
    for d in event_dates:
        event_by_date[d] = {}
        event_by_date[d]['pers'] = [a for a in p.actualities if a.kind == 'pers' and a.date == d]
        event_by_date[d]['inf'] = [a for a in p.actualities if a.kind == 'inf' and a.date == d]
        event_by_date[d]['don'] = [don for don in p.donations if don.date == d]
        event_by_date[d]['com'] = [com for com in p.comments if com == d]
    
    for don in p.donations:
        if don.amount == 0:
            don.amount = p.mean_don
    
    p_amount = 0
    nb_don = 0
    
    mdon = False
    minfo = False
    mpers = False
    
    mcom = False
    mos = False
    mcr = 0
    mar = 0
    
    mnjump1 = 0
    mnjumpa = 0
    
    
    for i in range(1, p.duration+1):
        d = {}
        
        d['link'] = p.link
        d['day'] = i
        try:
            day_data = event_by_date[current_date]
        except KeyError:
            day_data = {'pers': [], 'inf': [], 'don': [], 'com': []} 
                
        nb_don += len(day_data['don'])
        
        d['DonJ'] = len(day_data['don']) >= 1
        d['DonJ-1'] = mdon
        mdon = d['DonJ']

        d['InfoJ'] = len(day_data['inf']) >= 1
        d['InfoJ-1'] = minfo
        minfo = d['InfoJ']

        d['PersJ'] = len(day_data['pers']) >= 1
        d['PersJ-1'] = mpers
        mpers = d['PersJ']
        
        d['AttractRelJ-1'] = mar
        mar = nb_don/i
        
        d['CommentJ-1'] = mcom
        mcom = len(day_data['com']) >= 1

        rel_don = [don.amount for don in day_data['don']]
        somme_rel_don = sum(rel_don)
        p_amount += somme_rel_don
        
        d['NJump1J-1'] = mnjump1
        mnjump1 = len([don for don in rel_don if don > 1])
        d['Jump1J'] = mnjump1 != 0
        
        d['NJumpAJ-1'] = mnjumpa
        mnjumpa = len([don for don in rel_don if don > p.mean_don])
        d['JumpAJ'] = mnjumpa != 0
        
        d['ComplRateJ-1'] = mcr
        mcr = int(p_amount*1.15 / p.aimed_amount * 100)
        
        d['OverSuccessJ-1'] = mos 
        mos = (p_amount * 1.15) > p.aimed_amount
        
        
        project_list.append(d)
        current_date = current_date + pd.to_timedelta(1, unit='d')

df = pd.DataFrame(project_list)

In [46]:
df = df.astype({'DonJ': 'int',
                'DonJ-1': 'int',
                'InfoJ': 'int',
                'InfoJ-1': 'int',
                'PersJ': 'int',
                'PersJ-1': 'int',
                'CommentJ-1': 'int',
                'Jump1J': 'int',
                'JumpAJ': 'int',
                'OverSuccessJ-1': 'int'
               })

In [50]:
df.to_csv('data/output/micro.csv', sep=';')