# Metal Earth Analysis

Analyzing the intersection of Tolkien references and metal band names from Metal Archives.

In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter

# Set up plotting
plt.style.use('seaborn')
plt.rcParams['figure.figsize'] = [12, 6]
sns.set_palette('deep')

ModuleNotFoundError: No module named 'pandas'

## Load and Clean Data

In [None]:
# Load the data
df = pd.read_csv('reports/metal_band_matches.csv')

# Create a matches-only dataframe
matches_df = df[df['Band Name'] != 'No match found'].copy()

print(f"Total terms checked: {len(df)}")
print(f"Terms with matches: {len(matches_df)}")
print(f"Match rate: {len(matches_df)/len(df):.2%}")

## Most Popular References

In [None]:
# Count and plot most common search terms that found matches
term_counts = matches_df['Search Name'].value_counts()

plt.figure(figsize=(15, 8))
term_counts.head(20).plot(kind='bar')
plt.title('Most Common Tolkien References Used by Metal Bands')
plt.xlabel('Reference')
plt.ylabel('Number of Bands')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()

# Print the top 10 with counts
print("\nTop 10 Most Used References:")
for term, count in term_counts.head(10).items():
    print(f"{term}: {count} bands")

## Reference Categories

Let's categorize some of the major references we see in the data.

In [None]:
# Define categories (based on the data we can see)
categories = {
    'Locations': ['Minas', 'Mordor', 'Gondor', 'Mirkwood', 'Angband', 'Gorgoroth'],
    'Characters': ['Gothmog', 'Gorthaur', 'Azog', 'Maeglin', 'Maedhros', 'Luthien'],
    'Artifacts': ['Aeglos', 'Great Horn', 'Black Sword'],
    'Creatures': ['Maiar', 'Balrog', 'Dragon', 'Warg'],
    'Concepts': ['Evil', 'Dark', 'Black', 'Death']
}

def categorize_term(term):
    for category, terms in categories.items():
        if any(ref.lower() in term.lower() for ref in terms):
            return category
    return 'Other'

matches_df['Category'] = matches_df['Search Name'].apply(categorize_term)

# Plot distribution of categories
plt.figure(figsize=(10, 10))
matches_df['Category'].value_counts().plot(kind='pie', autopct='%1.1f%%')
plt.title('Distribution of Tolkien Reference Categories in Metal Band Names')

## Name Length Analysis

In [None]:
# Analyze term lengths
matches_df['Term Length'] = matches_df['Search Name'].str.len()

plt.figure(figsize=(12, 6))
sns.histplot(data=matches_df, x='Term Length', bins=30)
plt.title('Distribution of Tolkien Reference Lengths in Band Names')
plt.xlabel('Length of Term')
plt.ylabel('Count')

# Print some statistics
print("\nReference Length Statistics:")
print(matches_df['Term Length'].describe())

## Multiple Band Analysis

Looking at terms that have been used by multiple bands.

In [None]:
# Find terms used by multiple bands
multiple_uses = matches_df['Search Name'].value_counts()
multiple_uses = multiple_uses[multiple_uses > 1]

print(f"Number of terms used by multiple bands: {len(multiple_uses)}")
print("\nMost reused terms:")
print(multiple_uses.head(10))

# Plot distribution of reuse
plt.figure(figsize=(10, 6))
sns.histplot(data=multiple_uses.values, bins=20)
plt.title('Distribution of Band Name Reuse')
plt.xlabel('Number of Bands Using the Term')
plt.ylabel('Count of Terms')

## Key Findings

1. Most commonly used references:
   - [List top findings]

2. Category distribution:
   - [List category insights]

3. Name length patterns:
   - [List length patterns]

4. Multiple usage patterns:
   - [List reuse patterns]

## Future Analysis Ideas

1. Cross-reference with band genres (would need additional data)
2. Geographic analysis of bands using Tolkien references
3. Timeline analysis of when these bands were formed
4. Analysis of band name modifications (e.g., adding prefixes/suffixes to Tolkien terms)
5. Network analysis of related terms