In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sqlite3

In [2]:
df = pd.read_csv('./Resources/monsters.csv')
pd.set_option('display.max_columns', None)
df.head()

Unnamed: 0.1,Unnamed: 0,_id,index,name,size,type,subtype,alignment,armor_class,hit_points,hit_dice,speed,strength,dexterity,constitution,intelligence,wisdom,charisma,proficiencies,damage_vulnerabilities,damage_resistances,damage_immunities,condition_immunities,senses,languages,challenge_rating,special_abilities,actions,legendary_actions,url,reactions,other_speeds
0,0,5f2329ad0b1bb138c59411ee,aboleth,Aboleth,Large,aberration,,lawful evil,17,135,18d10,"{'walk': '10 ft.', 'swim': '40 ft.'}",21,9,15,18,15,18,"[{'name': 'Saving Throw: CON', 'url': '/api/pr...",[],[],[],[],"{'darkvision': '120 ft.', 'passive_perception'...","Deep Speech, telepathy 120 ft.",10.0,"[{'name': 'Amphibious', 'desc': 'The aboleth c...","[{'name': 'Multiattack', 'desc': 'The aboleth ...","[{'name': 'Detect', 'desc': 'The aboleth makes...",/api/monsters/aboleth,,
1,1,5f2329ad0b1bb138c59411ef,acolyte,Acolyte,Medium,humanoid,any race,any alignment,10,9,2d8,{'walk': '30 ft.'},10,10,10,10,14,11,"[{'name': 'Skill: Medicine', 'url': '/api/prof...",[],[],[],[],{'passive_perception': 12},any one language (usually Common),0.25,"[{'name': 'Spellcasting', 'desc': 'The acolyte...","[{'name': 'Club', 'desc': 'Melee Weapon Attack...",,/api/monsters/acolyte,,
2,2,5f2329ad0b1bb138c59411f1,adult-black-dragon,Adult Black Dragon,Huge,dragon,,chaotic evil,19,195,17d12,"{'walk': '40 ft.', 'fly': '80 ft.', 'swim': '4...",23,14,21,14,13,17,"[{'name': 'Saving Throw: DEX', 'url': '/api/pr...",[],[],['acid'],[],"{'blindsight': '60 ft.', 'darkvision': '120 ft...","Common, Draconic",14.0,"[{'name': 'Amphibious', 'desc': 'The dragon ca...","[{'name': 'Multiattack', 'desc': 'The dragon c...","[{'name': 'Detect', 'desc': 'The dragon makes ...",/api/monsters/adult-black-dragon,,
3,3,5f2329ad0b1bb138c59411f0,adult-blue-dragon,Adult Blue Dragon,Huge,dragon,,lawful evil,19,225,18d12,"{'walk': '40 ft.', 'burrow': '30 ft.', 'fly': ...",25,10,23,16,15,19,"[{'name': 'Saving Throw: DEX', 'url': '/api/pr...",[],[],['lightning'],[],"{'blindsight': '60 ft.', 'darkvision': '120 ft...","Common, Draconic",16.0,"[{'name': 'Legendary Resistance', 'desc': 'If ...","[{'name': 'Multiattack', 'desc': 'The dragon c...","[{'name': 'Detect', 'desc': 'The dragon makes ...",/api/monsters/adult-blue-dragon,,
4,4,5f2329ad0b1bb138c59411f2,adult-brass-dragon,Adult Brass Dragon,Huge,dragon,,chaotic good,18,172,15d12,"{'walk': '40 ft.', 'burrow': '40 ft.', 'fly': ...",23,10,21,14,13,17,"[{'name': 'Saving Throw: DEX', 'url': '/api/pr...",[],[],['fire'],[],"{'blindsight': '60 ft.', 'darkvision': '120 ft...","Common, Draconic",13.0,"[{'name': 'Legendary Resistance', 'desc': 'If ...","[{'name': 'Multiattack', 'desc': 'The dragon c...",,/api/monsters/adult-brass-dragon,,


In [3]:
# Check for missing values
print(df.isnull().sum())

# Check for duplicate rows
print(f"Number of duplicate rows: {df.duplicated().sum()}")

# Check the column names and data types
print(df.info())


Unnamed: 0                  0
_id                         0
index                       0
name                        0
size                        0
type                        0
subtype                   258
alignment                   0
armor_class                 0
hit_points                  0
hit_dice                    0
speed                       0
strength                    0
dexterity                   0
constitution                0
intelligence                0
wisdom                      0
charisma                    0
proficiencies               0
damage_vulnerabilities      0
damage_resistances          0
damage_immunities           0
condition_immunities        0
senses                      0
languages                 129
challenge_rating            0
special_abilities          56
actions                     2
legendary_actions         293
url                         0
reactions                 311
other_speeds              318
dtype: int64
Number of duplicate rows: 0

In [5]:
# Drop rows with missing values in 'Type', 'CR', or 'Size' columns
df_cleaned = df.dropna(subset=['type', 'challenge_rating', 'size'])

# Convert 'CR' to numeric if necessary (handle any non-numeric values)
df_cleaned['challenge_rating'] = pd.to_numeric(df_cleaned['challenge_rating'], errors='coerce')

# If any CR values were converted to NaN due to coercion, handle them
df_cleaned = df_cleaned.dropna(subset=['challenge_rating'])

# Verify the data after cleaning
print(df_cleaned.head())


   Unnamed: 0                       _id               index  \
0           0  5f2329ad0b1bb138c59411ee             aboleth   
1           1  5f2329ad0b1bb138c59411ef             acolyte   
2           2  5f2329ad0b1bb138c59411f1  adult-black-dragon   
3           3  5f2329ad0b1bb138c59411f0   adult-blue-dragon   
4           4  5f2329ad0b1bb138c59411f2  adult-brass-dragon   

                 name    size        type   subtype      alignment  \
0             Aboleth   Large  aberration       NaN    lawful evil   
1             Acolyte  Medium    humanoid  any race  any alignment   
2  Adult Black Dragon    Huge      dragon       NaN   chaotic evil   
3   Adult Blue Dragon    Huge      dragon       NaN    lawful evil   
4  Adult Brass Dragon    Huge      dragon       NaN   chaotic good   

   armor_class  hit_points hit_dice  \
0           17         135    18d10   
1           10           9      2d8   
2           19         195    17d12   
3           19         225    18d12   
4    

In [9]:
# Grouping by 'Type', 'CR', and 'Size'
grouped_df = df_cleaned.groupby(['type', 'challenge_rating', 'size']).agg(

    count=('name', 'size')
).reset_index()

# Display the first few rows of the grouped data
print(grouped_df.head())


         type  challenge_rating    size  count
0  aberration               2.0  Medium      1
1  aberration               4.0   Large      1
2  aberration               5.0   Large      1
3  aberration               8.0   Large      1
4  aberration              10.0   Large      1


In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score


In [11]:

# Group the data by Type, CR, and Size and aggregate the data
grouped_df = df_cleaned.groupby(['type', 'challenge_rating', 'size']).agg(
    count=('name', 'size')
).reset_index()

# Define features (X) and target (y)
X = grouped_df[['type', 'size']]  # Features: Type and Size
y = grouped_df['challenge_rating']  # Target variable: Challenge Rating (CR)


In [12]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [14]:
# Define preprocessing for categorical data (Type) and numerical data (Size)
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), ['type']),  # One-hot encode 'Type' column
        ('num', 'passthrough', ['size'])    
    ])

# Create the pipeline with preprocessing and regression
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])


In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score

# Create a numeric mapping for the 'size' column
size_mapping = {
    'Tiny': 1,
    'Small': 2,
    'Medium': 3,
    'Large': 4,
    'Huge': 5,
    'Gargantuan': 6
}

# Apply the size mapping to the 'size' column
df_cleaned['size_numeric'] = df_cleaned['size'].map(size_mapping)

# Group the data by Type, CR, and Size and aggregate the data
grouped_df = df_cleaned.groupby(['type', 'challenge_rating', 'size_numeric']).agg(
    count=('name', 'size')
).reset_index()

# Define features (X) and target (y)
X = grouped_df[['type', 'size_numeric']]  # Features: Type and Size
y = grouped_df['challenge_rating']  # Target variable: Challenge Rating (CR)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define preprocessing for categorical data (Type) and numerical data (Size)
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), ['type']),  # One-hot encode 'Type' column
        ('num', 'passthrough', ['size_numeric'])  # Use numeric values of 'size'
    ])

# Create the pipeline with preprocessing and regression
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print(f'Mean Squared Error: {mean_squared_error(y_test, y_pred)}')
print(f'R^2 Score: {r2_score(y_test, y_pred)}')

# You can print some predictions to inspect the results
print(f'Some predictions: {y_pred[:10]}')



Mean Squared Error: 15.426364435086798
R^2 Score: 0.5433499314545404
Some predictions: [14.99151457  6.47439624  3.73267609 10.26273214 -0.78834816 10.34217602
  6.55384012  9.74111231  3.73267609 17.3177841 ]
