In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
data = pd.read_csv('sacrifices-clean.csv')
data

Unnamed: 0,item,enchant,mod,color,thmm,mana
0,Warhammer,Rage,1,yellow,40,18
1,Amulet,Melancholy,0,green,40,20
2,Amulet,Abstraction,0,blue,71,48
3,Ring,Hope,6,yellow,50,19
4,Warhammer,Flame,0,blue,4,5
...,...,...,...,...,...,...
831,Longsword,Wrath,3,blue,33,32
832,Amulet,Rage,2,green,28,20
833,Pendant,Power,5,red,38,24
834,Amulet,Wounding,0,blue,71,48


In [2]:
def predictor(item, mod, color, thmm):
    if color == "yellow":
        return 19.5
    elif color == "blue":
        if item in ["Amulet", "Pendant", "Ring"]:
            return thmm - 22
        else:
            return thmm
    elif color == "red":
        if mod < 2:
            return 20.5
        else:
            return 32.3
    else: # color is green
        if mod < 2:
            return 17.4
        else:
            return 26.8

In [3]:
data["pred"] = data.apply(
    lambda r: predictor(r["item"], r["mod"], r["color"], r["thmm"]),
    axis=1)
data

Unnamed: 0,item,enchant,mod,color,thmm,mana,pred
0,Warhammer,Rage,1,yellow,40,18,19.5
1,Amulet,Melancholy,0,green,40,20,17.4
2,Amulet,Abstraction,0,blue,71,48,49.0
3,Ring,Hope,6,yellow,50,19,19.5
4,Warhammer,Flame,0,blue,4,5,4.0
...,...,...,...,...,...,...,...
831,Longsword,Wrath,3,blue,33,32,33.0
832,Amulet,Rage,2,green,28,20,26.8
833,Pendant,Power,5,red,38,24,32.3
834,Amulet,Wounding,0,blue,71,48,49.0


In [4]:
r2_score(data["mana"], data["pred"])

0.4709907931749393

In [5]:
store = pd.read_csv("store-clean.csv")
store

Unnamed: 0,item,enchant,mod,color,thmm,price
0,Longsword,Wounding,2,red,14,66
1,Warhammer,Justice,1,yellow,5,41
2,Hammer,Capability,0,blue,35,35
3,Pendant,Truth,0,red,40,38
4,Ring,Joy,5,blue,29,32
5,Warhammer,Flame,2,yellow,48,65
6,Battleaxe,Glory,0,blue,7,23
7,Plough,Plenty,0,yellow,12,35
8,Saw,Capability,1,green,16,35
9,Amulet,Wounding,2,green,50,35


In [6]:
store["pred"] = store.apply(
    lambda r: predictor(r["item"], r["mod"], r["color"], r["thmm"]),
    axis=1)
store

Unnamed: 0,item,enchant,mod,color,thmm,price,pred
0,Longsword,Wounding,2,red,14,66,32.3
1,Warhammer,Justice,1,yellow,5,41,19.5
2,Hammer,Capability,0,blue,35,35,35.0
3,Pendant,Truth,0,red,40,38,20.5
4,Ring,Joy,5,blue,29,32,7.0
5,Warhammer,Flame,2,yellow,48,65,19.5
6,Battleaxe,Glory,0,blue,7,23,7.0
7,Plough,Plenty,0,yellow,12,35,19.5
8,Saw,Capability,1,green,16,35,17.4
9,Amulet,Wounding,2,green,50,35,26.8


Calculation for P(2 hi-mod greens >= 30).
998 samples makes for nice round numbers after applying Laplace's Law of Succession.

In [7]:
pop = data.query('color == "green" and mod >= 2')["mana"]
samp = pd.Series([ pop.sample(2, replace=True).sum() for i in range(0,998) ])
samp.apply(lambda x: x >= 30).value_counts()

True     965
False     33
dtype: int64