In [1]:
# 1.
import pandas as pd
import numpy as np
df = pd.read_csv('griselbridge.csv', index_col='bridgeid')
df.replace({'material':np.nan}, value='unknown', inplace=True)
print(df)

         material  age  length  mosscover covered cursed waterbody  guardian
bridgeid                                                                    
1           stone  2.5     4.5        0.8       n      y     river     troll
2           stone  3.8     3.1        0.5       n      y     marsh       NaN
3         unknown  2.1     4.4        0.6       n      n     marsh     troll
4            wood  1.2     2.2        0.6       n      n     creek  squirrel
5           brick  2.6     3.8        0.9       n      n     river     troll
...           ...  ...     ...        ...     ...    ...       ...       ...
929         brick  1.6     1.9        0.7       n      y     marsh  squirrel
930         stone  3.7     4.2        0.5       n      y     marsh  squirrel
931         brick  2.0     2.7        0.4       n      n     river      none
932          wood  1.7     2.5        0.9       n      n     river  squirrel
933         stone  4.4     3.9        0.6       n      y     marsh      ogre

In [3]:
# 2.
print(df[['material', 'length']].groupby('material').agg(['mean', 'median']))
print("The mean and median lengths for brick bridges are 2.906 and 2.8. For stone, the mean and median values are 3.041 and 3.1. For wood, 4.080 and 4.2, and for unknown materials they are 2.949 and 2.9.")
print(df[['cursed', 'age', 'mosscover']].groupby('cursed').agg('mean'))
print("Cursed bridges tend to be slightly older and have a higher level of moss-cover.")

            length       
              mean median
material                 
brick     2.906173    2.8
stone     3.041315    3.1
unknown   4.080000    4.2
wood      2.948851    2.9
The mean and median lengths for brick bridges are 2.906 and 2.8. For stone, the mean and median values are 3.041 and 3.1. For wood, 4.080 and 4.2, and for unknown materials they are 2.949 and 2.9.
             age  mosscover
cursed                     
n       2.117427   0.488581
y       2.434921   0.728617
Cursed bridges tend to be slightly older, but not by much. Cursed bridges also have a slightly higher level of moss-cover.


In [5]:
# 3.
df["mossqty"] = df.mosscover * df.length
df["mossrate"] = df.mossqty / df.age 

print(df[["mosscover", "length", "age", "mossqty", "mossrate"]])

print(df[["material", "mossrate"]].groupby('material').agg('mean'))
print("On average, moss grows at a rate of 0.857 linear units per epoch on brick bridges, 0.891 on stone bridges, 1.403 on wood bridges, and 0.984 on bridges of unknown material.")

print(df[["covered", "mossrate"]].groupby('covered').agg('mean'))
print("Covered bridges encourage slower moss growth.")

          mosscover  length  age  mossqty  mossrate
bridgeid                                           
1               0.8     4.5  2.5     3.60  1.440000
2               0.5     3.1  3.8     1.55  0.407895
3               0.6     4.4  2.1     2.64  1.257143
4               0.6     2.2  1.2     1.32  1.100000
5               0.9     3.8  2.6     3.42  1.315385
...             ...     ...  ...      ...       ...
929             0.7     1.9  1.6     1.33  0.831250
930             0.5     4.2  3.7     2.10  0.567568
931             0.4     2.7  2.0     1.08  0.540000
932             0.9     2.5  1.7     2.25  1.323529
933             0.6     3.9  4.4     2.34  0.531818

[933 rows x 5 columns]
          mossrate
material          
brick     0.857133
stone     0.890560
unknown   0.984254
wood      1.403203
On average, moss grows at a rate of 0.857 linear units per epoch on brick bridges, 0.891 on stone bridges, 1.403 on wood bridges, and 0.984 on bridges of unknown material.
         mossr

In [7]:
# 4.
df.dropna(subset=["guardian"], inplace=True)
df["flow"] = np.where((df.waterbody == 'river') | (df.waterbody == 'creek'), 'y', 'n')
conditions = [df.guardian == "ogre", df.guardian == "troll", df.guardian == "squirrel", True]
values = [0.1, 0.2, 0.01, 0.0]
df["threatlvl"] = np.select(conditions, values)
print(df[["waterbody", "flow", "guardian", "threatlvl"]])

         waterbody flow  guardian  threatlvl
bridgeid                                    
1            river    y     troll       0.20
3            marsh    n     troll       0.20
4            creek    y  squirrel       0.01
5            river    y     troll       0.20
6            river    y  squirrel       0.01
...            ...  ...       ...        ...
929          marsh    n  squirrel       0.01
930          marsh    n  squirrel       0.01
931          river    y      none       0.00
932          river    y  squirrel       0.01
933          marsh    n      ogre       0.10

[921 rows x 4 columns]


In [11]:
# 5.
print(df[["flow", "threatlvl"]].groupby("flow").agg("mean"))
print("On average, bridges over flowing water are less dangerous than bridges over non-flowing water.")
print(df[["material", "threatlvl"]].groupby("material").agg("mean"))
print("On average, stone bridges have the highest threat level.")
print(df[(df.material != "unknown")][["material", "flow", "threatlvl"]].groupby(["material", "flow"]).agg("mean"))
print("Brick bridges over non-flowing water yield the highest threat level.")


      threatlvl
flow           
n      0.118910
y      0.100626
On average, bridges over flowing water are less dangerous than bridges over non-flowing water.
          threatlvl
material           
brick      0.099969
stone      0.112799
unknown    0.104000
wood       0.100457
On average, stone bridges have the highest threat level.
               threatlvl
material flow           
brick    n      0.126667
         y      0.089657
stone    n      0.114286
         y      0.112207
wood     n      0.114821
         y      0.093697
Brick bridges over non-flowing water yield the highest threat level.


In [13]:
# 6.
dfsub = df[(df.age > 1.0) & ((df.material == "stone") | (df.material == "brick")) & (df.length >= 4.2)]
print(dfsub)
print(f"{len(dfsub)} bridges satisfy the conditions to be included in this subset.")

         material  age  length  mosscover covered cursed waterbody  guardian  \
bridgeid                                                                       
1           stone  2.5     4.5        0.8       n      y     river     troll   
8           brick  3.1     4.3        0.8       n      n     marsh      ogre   
17          stone  2.9     4.3        1.0       n      n     river     troll   
21          stone  3.3     4.3        0.9       n      n     marsh  squirrel   
25          stone  1.5     4.2        0.8       n      n     river     troll   
...           ...  ...     ...        ...     ...    ...       ...       ...   
906         brick  2.9     4.2        0.8       n      y     marsh     troll   
911         stone  3.6     4.3        0.4       n      n     river      ogre   
912         stone  3.8     4.2        0.4       n      n     river      ogre   
914         brick  1.5     4.3        0.6       n      n     river     troll   
930         stone  3.7     4.2        0.

In [71]:
# 7.
dfa = df.dropna()
print(dfa)
print(f"There are {len(dfa)} records now that those with null values have been removed.")

         material  age  length  mosscover covered cursed waterbody  guardian  \
bridgeid                                                                       
1           stone  2.5     4.5        0.8       n      y     river     troll   
3         unknown  2.1     4.4        0.6       n      n     marsh     troll   
4            wood  1.2     2.2        0.6       n      n     creek  squirrel   
5           brick  2.6     3.8        0.9       n      n     river     troll   
6           stone  2.4     2.4        0.8       y      n     river  squirrel   
...           ...  ...     ...        ...     ...    ...       ...       ...   
929         brick  1.6     1.9        0.7       n      y     marsh  squirrel   
930         stone  3.7     4.2        0.5       n      y     marsh  squirrel   
931         brick  2.0     2.7        0.4       n      n     river      none   
932          wood  1.7     2.5        0.9       n      n     river  squirrel   
933         stone  4.4     3.9        0.

In [17]:
# 8.
print(dfa[(dfa.material != "unknown")][["material", "waterbody", "threatlvl"]].groupby(["material", "waterbody"]).agg("mean"))
print("I would prefer to cross Bridge A because I would have the lowest probability of getting attacked at 0.100471") 


                    threatlvl
material waterbody           
brick    creek       0.109344
         marsh       0.125843
         river       0.082899
stone    creek       0.085949
         marsh       0.116207
         river       0.121429
wood     creek       0.078125
         marsh       0.115091
         river       0.100471
I would prefer to cross a bridge made of wood that crosses a river because I would have the lowest probability of getting attacked at 0.100471


In [89]:
# 9.
stdDeviation = dfa["mosscover"].agg("std")
mean = dfa["mosscover"].agg("mean")
print(stdDeviation, mean)
conditions = [
    dfa.mosscover < (mean - stdDeviation),
    dfa.mosscover < (mean + stdDeviation),
    True
]
values = ["low", "average", "high"]
dfa["mosscat"] = np.select(conditions, values)
dfsub = dfa[
    ((dfa.material == "stone") & (dfa.mosscat == "average") & (dfa.waterbody == "marsh")) | 
    ((dfa.material == "brick") & (dfa.mosscat == "average") & (dfa.waterbody == "river")) |
    ((dfa.material == "brick") & (dfa.mosscat == "average") & (dfa.waterbody == "creek")) |
    ((dfa.material == "stone") & (dfa.mosscat == "low") & (dfa.waterbody == "creek")) |
    ((dfa.material == "wood") & (dfa.mosscat == "high") & (dfa.waterbody == "river"))
]
print(dfsub[["material", "waterbody", "mosscat", "threatlvl"]].groupby(["material", "mosscat", "waterbody"]).agg("mean"))
print("I would prefer to cross bridge G because I have the lowest probability of being attacked at 0.06.")
                                                                                   

0.2694010316776287 0.5713340683572217
                            threatlvl
material mosscat waterbody           
brick    average creek       0.123659
                 river       0.086765
stone    average marsh       0.122703
         low     creek       0.060000
wood     high    river       0.128125
I would prefer to cross bridge G because I have the lowest probability of being attacked at 0.06.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfa["mosscat"] = np.select(conditions, values)
