In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

bands = pd.read_csv('data/ma_bands_data.csv', index_col=0).sort_values('date')

In [2]:
bands

Unnamed: 0,name,url,genre,theme,label,country,location,status,date,years
84494,Olympic,https://www.metal-archives.com/bands/Olympic/3...,Pop Rock; Heavy Metal/Hard Rock (mid),,,Czechia,Prague,Active,1962.0,1962-present
85413,Orient,https://www.metal-archives.com/bands/Orient/51156,Hard Rock (early); Heavy Metal (later),"['Sex', ' Evil']",Unsigned/independent,Czechia,Prague,Active,1964.0,"1964-1990,1998-present"
98592,Scorpions,https://www.metal-archives.com/bands/Scorpions...,Heavy Metal/Hard Rock,"['Life', ' Society', ' Love', ' Sex', ' Inner ...",,Germany,"Hannover, Lower Saxony",Active,1964.0,1964-present
43631,Faithful Breath,https://www.metal-archives.com/bands/Faithful_...,Progressive Rock (early); Heavy Metal/Hard Roc...,"['Adventure', ' Fantasy', ' Women']",,Germany,"Witten, North Rhine-Westphalia",Changed name,1967.0,"1966-1967 (as),1967-1986,1986-1993 (as)"
67636,Limelight,https://www.metal-archives.com/bands/Limelight...,NWOBHM/Heavy Metal/Hard Rock,"['Life', ' Feelings', ' Love', ' Thoughts']",,United Kingdom,"Mansfield, Nottinghamshire, England",Split-up,1967.0,"1967-?,1997"
...,...,...,...,...,...,...,...,...,...,...
129176,כף הקלע,https://www.metal-archives.com/bands/%D7%9B%D7...,Black/Death Metal/Crust Punk,,Unsigned/independent,Israel,"Haifa, Haifa",Active,,
129197,ᛋᚢᚱᛏᚱ ᚹᛟᚢᚾᛞᛋ,https://www.metal-archives.com/bands/%E1%9B%8B...,"Raw Black Metal, Dark Ambient",,,United Kingdom,,Active,,
129208,こくまろみるく,https://www.metal-archives.com/bands/%E3%81%93...,Experimental Power Metal,"['The bizarre', ' Macabre']",Unsigned/independent,Japan,,Active,,
129224,ノストラダムス,https://www.metal-archives.com/bands/%E3%83%8E...,Progressive Metal,,,Japan,,Split-up,,


# 1. By Genre

In [3]:
bands[bands['genre'].str.lower().str.contains('eastern')].head()

Unnamed: 0,name,url,genre,theme,label,country,location,status,date,years
87513,Pentagram,https://www.metal-archives.com/bands/Pentagram...,Thrash Metal (early); Heavy/Middle Eastern Fol...,"['Metal', ' War', ' Misery (early); Suicide', ...",,Türkiye,Istanbul,Active,1987.0,"1987-present,2001-present (as)"
85566,Orphaned Land,https://www.metal-archives.com/bands/Orphaned_...,Doom/Death/Middle Eastern Folk Metal (early); ...,"['Abrahamic religions', ' Unity', ' Peace']",,Israel,"Bat Yam, Tel Aviv / Petah Tikva, Central",Active,1992.0,"1991-1992 (as),1992-1997,2001-present"
72410,Melechesh,https://www.metal-archives.com/bands/Melechesh...,Black/Death/Middle Eastern Folk Metal,['Mesopotamian mythology'],,International,Jerusalem/Bethlehem (early); Netherlands/Franc...,Active,1993.0,1993-present
14215,Blaakyum,https://www.metal-archives.com/bands/Blaakyum/...,Thrash/Heavy/Middle Eastern Folk Metal,"['Conceptual (thematic)', ' Symbolic', ' Socia...",Unsigned/independent,Lebanon,"Beirut / Antelias, Matn",Active,1995.0,"1995-2001,2007-present"
34862,Distorted,https://www.metal-archives.com/bands/Distorted...,Gothic/Melodic Death Metal with Middle Eastern...,"['Depression', ' Romance', ' Hypocrisy']",,Israel,"Bat Yam, Tel Aviv",Active,1996.0,"1996-2010,2022-present"


In [4]:
from sklearn.feature_extraction.text import CountVectorizer

vectorizer = CountVectorizer(binary=True, token_pattern="middle eastern|[0-9a-z'\-]+", min_df=10,
                             stop_words=['metal', 'rock', "'n'", 'roll', 'early', 'mid', 'later',
                                         'and', 'elements', 'influences', 'various', 'with'])
X = vectorizer.fit_transform(bands.loc[:, 'genre'])
print(vectorizer.get_feature_names_out())
#print(X.todense())

['acoustic' 'alternative' 'ambient' 'aor' 'atmospheric' 'avant-garde'
 'black' 'blackened' 'blues' 'brutal' 'celtic' 'classical' 'crossover'
 'crust' 'crustcore' 'cybergrind' 'd-beat' 'dark' 'darkwave' 'death'
 'deathcore' 'depressive' 'djent' 'doom' 'drone' 'dungeon' 'electronic'
 'electronica' 'epic' 'experimental' 'extreme' 'folk' 'funeral' 'funk'
 'fusion' 'glam' 'goregrind' 'gothic' 'grind' 'grindcore' 'groove'
 'grunge' 'hard' 'hardcore' 'heavy' 'industrial' 'jazz' 'mathcore'
 'medieval' 'melodic' 'metalcore' 'middle eastern' 'neoclassical'
 'neofolk' 'noise' 'noisecore' 'noisegrind' 'nu-metal' 'nwobhm' 'oi'
 'operatic' 'pagan' 'pop' 'post-black' 'post-hardcore' 'post-metal'
 'post-punk' 'post-rock' 'power' 'powerviolence' 'progressive'
 'psychedelic' 'punk' 'rac' 'raw' 'shoegaze' 'shred' 'slam' 'sludge'
 'southern' 'speed' 'stoner' 'symphonic' 'synth' 'technical' 'thrash'
 'viking']


In [5]:
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=30, n_init=50)
kmeans.fit(X)
kmeans.labels_

array([13, 13, 13, ..., 18,  7, 10], dtype=int32)

In [6]:
groups = bands.loc[:].groupby(kmeans.labels_)

In [7]:
groups.size()

0      1717
1     23692
2     14431
3     11471
4      2304
5      6185
6      6982
7      5465
8      1343
9      2405
10     1325
11    10241
12     5339
13     2553
14     1270
15     1369
16     4045
17     1377
18     3292
19     4175
20     1560
21      899
22     1602
23     3606
24      337
25     1532
26     1647
27     1945
28     1996
29     3366
dtype: int64

In [8]:
groups['name'].apply(list)

0     [Pantera, Kreon, Spectrus, White Zombie, Battl...
1     [Evil Blood, Skull Face, White Hell, Cerbero, ...
2     [Graf Spee, Necrophagia, Imperator, Аясын Салх...
3     [Zoetrope, IG-Elit, Blind Illusion, Stone Veng...
4     [Trouble, Warhead, Blaster, Valhall, Angel Rot...
5     [Vulcano, Parabellum, Toxodeth, Virgin Witch, ...
6     [Grinder Heavy Metal, Poison, Master, Mantas, ...
7     [Rush, Night Sun, Yargos, Ñu, Annex, Graveston...
8     [In Flames, EOS, Don't Deashcore aus Seesen am...
9     [El Reloj, Christopher Lee, Fongus, Riot V, Ci...
10    [Corrosion of Conformity, Melvins, Neurosis, K...
11    [Black Sabbath, Judas Priest, Back Out, La Ban...
12    [Rax, Carcass, Alucard, Misanthrope, Threnody,...
13    [Olympic, Orient, Scorpions, Faithful Breath, ...
14    [Graveyard Rodeo, Eyehategod, Sludgelord, Will...
15    [Ragnarok, Corvus Corax, Steve Von Till, Raven...
16    [Die Krupps, Ministry, Faith No More, Fall of ...
17    [Death, Disharmonic Orchestra, Thormenthor

In [9]:
kmeans.cluster_centers_

array([[5.82411182e-04, 1.92195690e-02, 0.00000000e+00, ...,
        1.10658125e-02, 0.00000000e+00, 0.00000000e+00],
       [6.75333446e-04, 4.22083404e-05, 4.85395914e-02, ...,
        2.95458383e-04, 0.00000000e+00, 5.31825089e-03],
       [0.00000000e+00, 7.62247938e-04, 6.23657404e-04, ...,
        5.75150717e-02, 0.00000000e+00, 1.10872427e-03],
       ...,
       [0.00000000e+00, 3.59897172e-03, 5.65552699e-03, ...,
        1.02827763e-03, 4.11311054e-03, 0.00000000e+00],
       [0.00000000e+00, 4.00801603e-03, 0.00000000e+00, ...,
        5.01002004e-04, 1.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 5.94177065e-04, 2.97088532e-04, ...,
        2.10932858e-02, 0.00000000e+00, 2.97088532e-04]])

In [10]:
top_pos = np.argsort(kmeans.cluster_centers_)[:, -1 : -6 : -1]
vectorizer.get_feature_names_out()[top_pos]

array([['groove', 'heavy', 'southern', 'melodic', 'sludge'],
       ['black', 'raw', 'atmospheric', 'ambient', 'melodic'],
       ['death', 'technical', 'groove', 'blackened', 'deathcore'],
       ['thrash', 'speed', 'power', 'progressive', 'metalcore'],
       ['stoner', 'doom', 'sludge', 'psychedelic', 'southern'],
       ['death', 'black', 'melodic', 'thrash', 'symphonic'],
       ['death', 'thrash', 'melodic', 'groove', 'progressive'],
       ['progressive', 'power', 'heavy', 'symphonic', 'melodic'],
       ['melodic', 'metalcore', 'death', 'thrash', 'groove'],
       ['heavy', 'power', 'progressive', 'thrash', 'melodic'],
       ['sludge', 'stoner', 'post-metal', 'atmospheric', 'hardcore'],
       ['heavy', 'melodic', 'doom', 'nwobhm', 'stoner'],
       ['melodic', 'death', 'progressive', 'doom', 'groove'],
       ['hard', 'heavy', 'melodic', 'nwobhm', 'progressive'],
       ['doom', 'sludge', 'drone', 'death', 'post-metal'],
       ['folk', 'black', 'pagan', 'viking', 'symphonic'

In [11]:
top = np.sort(kmeans.cluster_centers_)[:, -1 : -6 : -1]
np.round(top, 2)

array([[1.  , 0.17, 0.08, 0.06, 0.05],
       [1.  , 0.05, 0.05, 0.05, 0.04],
       [1.  , 0.06, 0.04, 0.03, 0.02],
       [1.  , 0.07, 0.06, 0.04, 0.03],
       [1.  , 0.66, 0.1 , 0.07, 0.04],
       [1.  , 1.  , 0.14, 0.09, 0.04],
       [1.  , 1.  , 0.11, 0.05, 0.03],
       [1.  , 0.16, 0.1 , 0.04, 0.04],
       [1.  , 1.  , 0.95, 0.08, 0.04],
       [1.  , 1.  , 0.09, 0.05, 0.05],
       [1.  , 0.3 , 0.18, 0.1 , 0.06],
       [1.  , 0.07, 0.03, 0.03, 0.01],
       [1.  , 1.  , 0.06, 0.06, 0.04],
       [1.  , 0.99, 0.12, 0.03, 0.03],
       [1.  , 1.  , 0.09, 0.06, 0.05],
       [1.  , 0.42, 0.12, 0.08, 0.07],
       [0.34, 0.24, 0.18, 0.13, 0.04],
       [1.  , 1.  , 0.11, 0.05, 0.05],
       [1.  , 0.16, 0.15, 0.1 , 0.03],
       [1.  , 1.  , 0.16, 0.09, 0.07],
       [1.  , 0.3 , 0.05, 0.03, 0.03],
       [1.  , 0.68, 0.08, 0.08, 0.03],
       [1.  , 0.95, 0.13, 0.09, 0.02],
       [1.  , 0.28, 0.18, 0.13, 0.06],
       [1.  , 0.08, 0.01, 0.01, 0.01],
       [1.  , 1.  , 0.06,

# 2. By Theme

In [12]:
bands[bands['theme'].str.lower().str.contains('early').fillna(False)].head()

Unnamed: 0,name,url,genre,theme,label,country,location,status,date,years
102499,Sir Lord Baltimore,https://www.metal-archives.com/bands/Sir_Lord_...,Heavy Metal/Hard Rock,"['Love', ' Women', ' Life (early); Christianit...",Unsigned/independent,United States,"New York, New York",Split-up,1968.0,"1968-1972,1977,2005-2006"
91937,Rainbow,https://www.metal-archives.com/bands/Rainbow/108,Hard Rock/Heavy Metal,"['Fantasy', ' Rock (early); Relationships', ' ...",,United States,"Los Angeles, California / Connecticut / New York",Active,1975.0,"1975 (as),1976-1984,1994-1997,2015-present"
33522,Diamond Head,https://www.metal-archives.com/bands/Diamond_H...,"NWOBHM, Heavy Metal/Hard Rock","['Fantasy', ' Fiction', ' Love (early); Societ...",,United Kingdom,"Stourbridge, West Midlands, England",Active,1976.0,"1976-1985,1991-1994,2000-present"
119193,V8,https://www.metal-archives.com/bands/V8/3236,Heavy Metal,"['Violence', ' Rebellion', ' Religion', ' Soci...",,Argentina,Buenos Aires,Split-up,1979.0,"1979-1987,1996"
81146,Nightmare,https://www.metal-archives.com/bands/Nightmare...,Heavy/Power Metal,"['Life', ' Rock (early); Fantasy', ' Armageddo...",,France,"Grenoble, Auvergne-Rhône-Alpes",Active,1979.0,"1979-1988,1999-present"


In [13]:
from sklearn.feature_extraction.text import CountVectorizer

vectorizer = CountVectorizer(binary=True, token_pattern="[0-9a-z][0-9a-z'\-/\s]*[0-9a-z]", min_df=10,
                             stop_words=['early', 'mid', 'later'])
X = vectorizer.fit_transform(bands.loc[: , 'theme'].fillna(''))
print(vectorizer.get_feature_names_out().shape)
#print(X.todense())

(1116,)


In [14]:
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=30, n_init=30)
kmeans.fit(X)
kmeans.labels_

array([ 1, 12,  7, ...,  1,  1,  1], dtype=int32)

In [15]:
groups = bands.loc[:].groupby(kmeans.labels_)

In [16]:
groups.size()

0        15
1     78097
2      1754
3      2126
4      2141
5      1447
6      3681
7      1875
8      1015
9      1643
10     1672
11     2872
12      911
13      888
14     2168
15     2711
16     1704
17     1716
18     1744
19     1662
20     1743
21      963
22      692
23     3070
24     5839
25     1931
26     1068
27      817
28      677
29      829
dtype: int64

In [17]:
groups['name'].apply(list)

0     [Kilcrops, Gorath, Abominor, Ambiguitas, Mitoc...
1     [Olympic, Cool Feet, Ash, Flower Travellin' Ba...
2     [Saint Vitus, Coroner, Osiris, Disgrace, Дай, ...
3     [Ritual, Törr, Oz, Satan's Host, Pagan Altar, ...
4     [Glory Bell's Band, The Clavion, Tool Silence,...
5     [Rush, Malhavoc, Armia, Martin, Magellan, Meko...
6     [Black Uniforms, The Accüsed, Mantas, Decapita...
7     [Scorpions, Deep Purple, Budgie, El Reloj, Luc...
8     [Judas Priest, Christopher Lee, Iron Maiden, K...
9     [Graf Spee, Metallian, Black Ivory, Minotaur, ...
10    [Made in England, Medallion, Atsushi Yokozeki,...
11    [Crucifixion, Sangre Humana, Midnight Darkness...
12    [Orient, Buffalo, Dr. Heathen Scum, The Handso...
13    [Quartz, Death SS, Tyrant, Venom, Voodoo Child...
14    [Limelight, Sir Lord Baltimore, Thin Lizzy, Ca...
15    [Faithful Breath, Black Sabbath, Cirith Ungol,...
16    [Bulldozer, Exciter, Overkill, Grave Digger, S...
17    [Trust, The Exploited, Hades, Amebix, D.D.

In [18]:
top_pos = np.argsort(kmeans.cluster_centers_)[:, -1 : -6 : -1]
vectorizer.get_feature_names_out()[top_pos]

array([['gnosis', 'death', 'occultism', 'satanism', 'chaos'],
       ['christianity', 'horror', 'love', 'social issues',
        'science fiction'],
       ['depression', 'suicide', 'death', 'nature', 'hate'],
       ['occultism', 'satanism', 'darkness', 'death', 'evil'],
       ['nature', 'paganism', 'mythology', 'mysticism',
        'national socialism'],
       ['philosophy', 'death', 'nature', 'life', 'occultism'],
       ['gore', 'death', 'violence', 'horror', 'murder'],
       ['society', 'life', 'death', 'inner struggles', 'religion'],
       ['history', 'war', 'mythology', 'fantasy', 'nature'],
       ['death', 'darkness', 'evil', 'satanism', 'war'],
       ['instrumental', 'christianity', 'nature', 'depression', 'sorrow'],
       ['darkness', 'evil', 'satanism', 'nature', 'war'],
       ['sex', 'gore', 'drugs', 'violence', 'alcohol'],
       ['satan', 'death', 'evil', 'darkness', 'anti-christianity'],
       ['life', 'love', 'feelings', 'metal', 'emotions'],
       ['fantasy',

In [19]:
top = np.sort(kmeans.cluster_centers_)[:, -1 : -6 : -1]
np.round(top, 2)

array([[1.  , 0.33, 0.27, 0.2 , 0.13],
       [0.02, 0.01, 0.01, 0.01, 0.01],
       [1.  , 0.28, 0.23, 0.09, 0.07],
       [1.  , 0.26, 0.17, 0.15, 0.05],
       [0.74, 0.42, 0.05, 0.04, 0.04],
       [1.  , 0.09, 0.09, 0.06, 0.05],
       [1.  , 0.42, 0.12, 0.1 , 0.08],
       [1.  , 0.21, 0.15, 0.06, 0.06],
       [1.  , 0.28, 0.17, 0.08, 0.08],
       [1.  , 1.  , 0.11, 0.11, 0.07],
       [1.  , 0.  , 0.  , 0.  , 0.  ],
       [1.  , 0.15, 0.12, 0.08, 0.07],
       [1.  , 0.21, 0.14, 0.14, 0.13],
       [1.  , 0.26, 0.17, 0.14, 0.13],
       [1.  , 0.15, 0.05, 0.04, 0.04],
       [1.  , 0.1 , 0.06, 0.05, 0.05],
       [1.  , 1.  , 0.1 , 0.07, 0.06],
       [1.  , 0.28, 0.19, 0.14, 0.08],
       [1.  , 0.12, 0.11, 0.05, 0.05],
       [1.  , 0.18, 0.11, 0.1 , 0.09],
       [1.  , 0.36, 0.15, 0.15, 0.07],
       [1.  , 0.53, 0.35, 0.08, 0.07],
       [1.  , 0.32, 0.2 , 0.1 , 0.06],
       [1.  , 0.09, 0.07, 0.06, 0.05],
       [1.  , 0.08, 0.06, 0.05, 0.05],
       [1.  , 0.37, 0.09,

# 3. By Year

In [20]:
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=30, n_init=50)
kmeans.fit(pd.DataFrame(bands['date']).dropna())
kmeans.labels_

array([11, 11, 11, ..., 18, 18, 18], dtype=int32)

In [21]:
groups = bands[~bands['date'].isna()].groupby(kmeans.labels_)

In [22]:
groups.size()

0     3625
1     5159
2     6242
3     3816
4     5969
5     5347
6     2262
7     4984
8     5078
9     4141
10    5689
11     290
12    3074
13    6685
14    5700
15    4353
16    4783
17    1414
18    1617
19    5348
20    2346
21    5851
22    5032
23    4167
24    4730
25    3828
26    4443
27    3596
28    5209
29    3537
dtype: int64

In [23]:
groups['name'].apply(list)

0     [Inertial Mass, Crypt, Infra Red, Sound Mind, ...
1     [Depurity, Derkaizer, Angel's Storm, From Hell...
2     [No Other God, No/Más, NightWraith, Veterans, ...
3     [Ravensblood, Inner Void, Leech 54, Prophecy, ...
4     [Megilloth, Mortal Enemy, Blasfema, Wings of M...
5     [Paroxysm, Invicticus, Castle in the Air, Dreg...
6     [Steel Prophet, Medallion, Hammers Rule, Anvil...
7     [Mismatched, Myrkgand, Evisceration, EvilRock,...
8     [Master Spy, Narcömancer, Umbrivago Sombrio, U...
9     [High n' Heavy, Stink Tank, Thousand Arrows, H...
10    [Unhale, Imperium, Dark O Malex, Ultor, Dark P...
11    [Olympic, Orient, Scorpions, Faithful Breath, ...
12    [Anticristo, Naked Truth, Digital Ruin, Wendig...
13    [Broken Glass, Sxuperion, Luna ad Noctum, Burn...
14    [Oxidized Faith, Blood Harvest, Assaulter, Nec...
15    [Groan, Noise Machine, Cadaveric Curse, Faithf...
16    [Terminal Grip, Persecution, Cerebral Haemorrh...
17    [Vortex, Crisis, Sacred Blade, Cutty Sark,

In [24]:
kmeans.cluster_centers_

array([[1990.51806897],
       [2009.        ],
       [2017.46859981],
       [2001.        ],
       [2005.        ],
       [1996.51374603],
       [1984.12776304],
       [2012.        ],
       [2019.48897204],
       [2014.        ],
       [2004.        ],
       [1975.74482759],
       [1988.55367599],
       [1998.52640239],
       [2007.        ],
       [1992.5090742 ],
       [1994.52101192],
       [1980.77369165],
       [2021.21706865],
       [2008.        ],
       [1986.54731458],
       [2006.        ],
       [2010.        ],
       [2002.        ],
       [2011.        ],
       [2015.        ],
       [2013.        ],
       [2016.        ],
       [2003.        ],
       [2000.        ]])

In [30]:
bands[bands['name'] == 'Hatred Sculpted Souls']

Unnamed: 0,name,url,genre,theme,label,country,location,status,date,years
52871,Hatred Sculpted Souls,https://www.metal-archives.com/bands/Hatred_Sc...,Death Metal,,Unsigned/independent,Brazil,"Carlos Barbosa, Rio Grande do Sul",Active,2013.0,2013-present
