In [1]:
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv('sacrifices.csv')
data

Unnamed: 0,Item name,Glow color,Thaumometer reading,Mana gained from sacrifice
0,Warhammer of Rage +1,yellow,40,18
1,Amulet of Melancholy,green,40,20
2,Amulet of Abstraction,blue,71,48
3,Ring of Hope +6,yellow,50,19
4,Warhammer of Flame,blue,4,5
...,...,...,...,...
831,Longsword of Wrath +3,blue,33,32
832,Amulet of Rage +2,green,28,20
833,Pendant of Power +5,red,38,24
834,Amulet of Wounding,blue,71,48


Do all of the items follow the "Item of Thing +X" scheme?

In [3]:
data["Item name"].str.match("\w+ of \w+( \+\d)?").all()

True

They do. Let's break the name down, then.

In [4]:
name_fragments = data["Item name"].str.extract("^(\w+) of (\w+)( \+(\d))?$")
name_fragments

Unnamed: 0,0,1,2,3
0,Warhammer,Rage,+1,1
1,Amulet,Melancholy,,
2,Amulet,Abstraction,,
3,Ring,Hope,+6,6
4,Warhammer,Flame,,
...,...,...,...,...
831,Longsword,Wrath,+3,3
832,Amulet,Rage,+2,2
833,Pendant,Power,+5,5
834,Amulet,Wounding,,


In [5]:
data = pd.concat([name_fragments, data], axis=1)
data

Unnamed: 0,0,1,2,3,Item name,Glow color,Thaumometer reading,Mana gained from sacrifice
0,Warhammer,Rage,+1,1,Warhammer of Rage +1,yellow,40,18
1,Amulet,Melancholy,,,Amulet of Melancholy,green,40,20
2,Amulet,Abstraction,,,Amulet of Abstraction,blue,71,48
3,Ring,Hope,+6,6,Ring of Hope +6,yellow,50,19
4,Warhammer,Flame,,,Warhammer of Flame,blue,4,5
...,...,...,...,...,...,...,...,...
831,Longsword,Wrath,+3,3,Longsword of Wrath +3,blue,33,32
832,Amulet,Rage,+2,2,Amulet of Rage +2,green,28,20
833,Pendant,Power,+5,5,Pendant of Power +5,red,38,24
834,Amulet,Wounding,,,Amulet of Wounding,blue,71,48


In [6]:
data = data.drop(columns=[2, "Item name"]).rename(columns={
    0: "item",
    1: "enchant",
    3: "mod",
    "Glow color": "color",
    "Thaumometer reading": "thmm",
    "Mana gained from sacrifice": "mana"
})
data

Unnamed: 0,item,enchant,mod,color,thmm,mana
0,Warhammer,Rage,1,yellow,40,18
1,Amulet,Melancholy,,green,40,20
2,Amulet,Abstraction,,blue,71,48
3,Ring,Hope,6,yellow,50,19
4,Warhammer,Flame,,blue,4,5
...,...,...,...,...,...,...
831,Longsword,Wrath,3,blue,33,32
832,Amulet,Rage,2,green,28,20
833,Pendant,Power,5,red,38,24
834,Amulet,Wounding,,blue,71,48


"mod" has a bunch of NaNs. It would be nice if we could call those zeroes. Are there any zeroes in the data already?

In [7]:
data["mod"].value_counts(dropna=False)

NaN    390
1      130
2      111
3       70
4       68
5       34
6       31
7        1
8        1
Name: mod, dtype: int64

There are no zeroes in the mod column, so we can do the following safely:

In [8]:
data["mod"] = data["mod"].apply(lambda x: x if x is not np.NaN else 0)
data

Unnamed: 0,item,enchant,mod,color,thmm,mana
0,Warhammer,Rage,1,yellow,40,18
1,Amulet,Melancholy,0,green,40,20
2,Amulet,Abstraction,0,blue,71,48
3,Ring,Hope,6,yellow,50,19
4,Warhammer,Flame,0,blue,4,5
...,...,...,...,...,...,...
831,Longsword,Wrath,3,blue,33,32
832,Amulet,Rage,2,green,28,20
833,Pendant,Power,5,red,38,24
834,Amulet,Wounding,0,blue,71,48


In [9]:
data.to_csv("sacrifices-clean.csv", index=False)

Now let's repeat the trick for the store items.

In [10]:
data = pd.read_csv("store.csv")
data

Unnamed: 0,Item name,Glow color,Thaumometer reading,Price
0,Longsword of Wounding +2,Red,14,66gp
1,Warhammer of Justice +1,Yellow,5,41gp
2,Hammer of Capability,Blue,35,35gp
3,Pendant of Truth,Red,40,38gp
4,Ring of Joy +5,Blue,29,32gp
5,Warhammer of Flame +2,Yellow,48,65gp
6,Battleaxe of Glory,Blue,7,23gp
7,Plough of Plenty,Yellow,12,35gp
8,Saw of Capability +1,Green,16,35gp
9,Amulet of Wounding +2,Green,50,35gp


In [11]:
name_fragments = data["Item name"].str.extract("^(\w+) of (\w+)( \+(\d))?$")
data = pd.concat([name_fragments, data], axis=1)
data = data.drop(columns=[2, "Item name"]).rename(columns={
    0: "item",
    1: "enchant",
    3: "mod",
    "Glow color": "color",
    "Thaumometer reading": "thmm",
    "Mana gained from sacrifice": "mana",
    "Price": "price"
})
data["mod"] = data["mod"].apply(lambda x: x if x is not np.NaN else 0)
# extra step: drop the "gp" at the end of each price
data["price"] = data["price"].apply(lambda s: s[:-2])
# extra step: lowercase color to match the other list
data["color"] = data["color"].apply(lambda s: s.lower())
data

Unnamed: 0,item,enchant,mod,color,thmm,price
0,Longsword,Wounding,2,red,14,66
1,Warhammer,Justice,1,yellow,5,41
2,Hammer,Capability,0,blue,35,35
3,Pendant,Truth,0,red,40,38
4,Ring,Joy,5,blue,29,32
5,Warhammer,Flame,2,yellow,48,65
6,Battleaxe,Glory,0,blue,7,23
7,Plough,Plenty,0,yellow,12,35
8,Saw,Capability,1,green,16,35
9,Amulet,Wounding,2,green,50,35


In [12]:
data.to_csv("store-clean.csv", index=False)