In [1]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
from IPython.display import Markdown, display

def pretty_print(df):
    fmt = ['---' for i in range(len(df.columns))]
    df_fmt = pd.DataFrame([fmt], columns=df.columns)
    df_formatted = pd.concat([df_fmt, df])
    display(Markdown(df_formatted.to_csv(sep="|", index=False)))

%matplotlib inline

In [2]:
def get_clean(isVerbose=False):
    df = pd.read_csv("dirtyks.csv")
    
    if isVerbose:
        print(df.head(3))
    
    df["country"] = df["country"].replace('N,0"', np.nan)
    # We don't want no weird country
    df["goal"] = df["goal"].replace(-1, np.nan)
    # How do you even have a goal that's negative????
    df["currency"] = df["currency"].replace("bitcoin", np.nan)
    # just. no.
    df["state"] = df["state"].replace(["Kyle", "undefined"], np.nan)
    # I'm not even a state! and what does it mean to be undefined???
    #df["state"] = df["state"].replace("undefined", np.nan) # see above
        
    clean = df[["name", "main_category", "currency", "goal", "pledged", "state", "backers", "country", "usd pledged", "usd_pledged_real"]]
    
    clean = clean.dropna()
        
    return clean
    # We just want to make sure we can use it elsewhere

get_clean().head(3)

Unnamed: 0,name,main_category,currency,goal,pledged,state,backers,country,usd pledged,usd_pledged_real
1,Greeting From Earth: ZGAC Arts Capsule For ET,Film & Video,USD,30000.0,2421.0,failed,15.0,US,100.0,2421.0
3,ToshiCapital Rekordz Needs Help to Complete Album,Music,USD,5000.0,1.0,failed,1.0,US,1.0,1.0
4,Community Film Project: The Art of Neighborhoo...,Film & Video,USD,19500.0,1283.0,canceled,14.0,US,1283.0,1283.0


In [3]:
def examine(isVerbose=False):
    df = get_clean()
    
    #largest asking amount
    #largest difference between goal and pledged
    #which countries earn the most kickstarter money
    
    max_goal = df["goal"].max()
    
    max_goal_entry = df.loc[df["goal"] == max_goal]
    
    max_pledged = df["usd_pledged_real"].max()
    
    max_pledged_entry = df.loc[df["usd_pledged_real"] == max_pledged]
    # Note max pledged in USD
    
    df["profit"] = df["pledged"] - df["goal"]
    
    max_profits = df["profit"].nlargest(5)
    # Largest profit in projects currency
    
    if isVerbose:
        print("=========== MAXES ===========")
        pretty_print(max_goal_entry)
        pretty_print(max_pledged_entry)
        print()
        print("======== MAX PROFITS ========")
        for number in max_profits:
            pretty_print(df.loc[df["profit"] == number, ["name", "main_category", "country", "state"]])

    countries = set(df["country"])
    
    US_amnt = 0
    other_amnt = 0
    
    if isVerbose:
        print()
        print("====== COUNTRY PROFITS ======")
    
    for country_name in countries:
        amnt = df.loc[df["country"] == country_name, "usd_pledged_real"].sum()
        if country_name == "US":
            US_amnt = amnt
        else:
            other_amnt += amnt
        if isVerbose:
            print("{} made {} overall.".format(country_name, amnt))
    
    if isVerbose:
        print()
        print("========COUNTRY COMPR========")
        print("The US made {}".format(US_amnt))
        print("The other countries made {}".format(other_amnt))
    
    # Ratio of total profit to countries worth??
    # Expanding on current data is good! Use multiple datasets together to get something interesting like relations!
    
examine(True)



name|main_category|currency|goal|pledged|state|backers|country|usd pledged|usd_pledged_real
---|---|---|---|---|---|---|---|---|---
Electric Car|Technology|MXN|40000000.0|510.0|failed|2.0|MX|0.0|28.86574598143536


name|main_category|currency|goal|pledged|state|backers|country|usd pledged|usd_pledged_real
---|---|---|---|---|---|---|---|---|---
Redefining Italian Luxury Watches - Filippo Loreti|Design|EUR|20000.0|4809548.0|successful|18550.0|NL|239150.72109454|5020667.049428469





name|main_category|country|state
---|---|---|---
Redefining Italian Luxury Watches - Filippo Loreti|Design|NL|successful


name|main_category|country|state
---|---|---|---
Pimax: The World's First 8K VR Headset|Technology|US|successful


name|main_category|country|state
---|---|---|---
3Doodler: The World's First 3D Printing Pen|Technology|US|successful


name|main_category|country|state
---|---|---|---
"Polygons | The Flat 4-in-1 Measuring Spoon"|Design|US|successful


name|main_category|country|state
---|---|---|---
"Air Bonsai | Create your ""little star"""|Design|US|successful



CA made 799964.9410524822 overall.
DE made 628978.0733471175 overall.
BE made 36169.64389919874 overall.
SE made 48173.30124908796 overall.
MX made 15542.153732584007 overall.
CH made 89652.75072388261 overall.
AU made 394290.0627140264 overall.
AT made 1198.8891633657431 overall.
IT made 77988.7990548545 overall.
US made 35854641.499999985 overall.
GB made 2586209.3129510107 overall.
DK made 20366.348309474175 overall.
NO made 30987.80003004175 overall.
FR made 140574.58029193472 overall.
ES made 139756.29156425706 overall.
NZ made 148839.72202166374 overall.
SG made 17847.439135524684 overall.
IE made 143652.4037052649 overall.
HK made 322155.2789659084 overall.
LU made 8612.5465355337 overall.
NL made 5153428.678031165 overall.

The US made 35854641.499999985
The other countries made 10804389.016478376


Note that the other countries net gain is not exactly correct because it's not in USD

# Tictactoe


## One Game
We want to get a board
### Players turn
* find empty spots
* pick a spot randomly
* check to see if player won

### End Scenario
* p1 wins
* p2 wins
* scratch

## Simulation

* get a number of times to simulate
* store wins for each player

In [4]:
def tictactoe(iterations=1000):
    def player_move(player, board, open_spots):
        move = np.random.choice(open_spots)
        open_spots.remove(move)
        board[move] = player
        return board, open_spots
        
    def win_check(board):
        win_scenarios = ((0,1,2), (3,4,5), (6,7,8), (0,3,6), (1,4,7), (2,5,8), (0,4,8), (2,4,6))
        for scenario in win_scenarios:
            if board[scenario[0]] == board[scenario[1]] == board[scenario[2]] != 0:
                return True
        return False
    
    def one_game():
        #[0 1 2
        # 3 4 5
        # 6 7 8]
        board = [0, 0, 0, 0, 0, 0, 0, 0, 0]
        open_spots = [0, 1, 2, 3, 4, 5, 6, 7, 8]
        
        players = [1, 2]
        
        player = 0
        
        while len(open_spots) > 0:
            board, open_spots = player_move(players[player], board, open_spots)
            
            if(win_check(board)):
                return players[player]
            player = not player

        return 0
    
    # 0 for scratch, 1 for p1, 2 for p2
    
    def simulate(it=iterations):
        p1_wins = 0
        p2_wins = 0
        for i in range(it):
            result = one_game()
            if result == 1:
                p1_wins += 1
            elif result == 2:
                p2_wins += 1
        
        print("p1 won {} out of {}".format(p1_wins, it))
        print("p2 won {} out of {}".format(p2_wins, it))
        print("The game ended in a draw {} out of {}".format(it-p1_wins-p2_wins, it))
        print()
        print("p1   won {:.3f} percent of the time".format(p1_wins/it*100))
        print("p2   won {:.3f} percent of the time".format(p2_wins/it*100))
        print("draw won {:.3f} percent of the time".format((it-p1_wins-p2_wins)/it*100))
        
    simulate()
    
tictactoe(12345)

p1 won 7139 out of 12345
p2 won 3620 out of 12345
The game ended in a draw 1586 out of 12345

p1   won 57.829 percent of the time
p2   won 29.324 percent of the time
draw won 12.847 percent of the time
