In [1]:
import pandas as pd
import plotly.graph_objects as go
from datetime import datetime
import networkx as nx
import plotly.express as px
import numpy as np
import plotly.io as pio
pio.renderers.default = "browser"


pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)


# CSV einlesen (Passe den Pfad an)
df = pd.read_csv("Downloads/Battle DB 23 06 2024 - battle_events(2).csv")

In [3]:
# Namen in neue Spalte kopieren für Ansetzungsform > norm_name

df["norm_name #1"] = df["Name #1"]
df["norm_name #2"] = df["Name #2"]

In [5]:
# Ansetzungsform bereinigen, Groß-Kleinschreibung

df["norm_name #1"] = df["norm_name #1"].str.title()
df["norm_name #2"] = df["norm_name #2"].str.title()
df["hidden"] = df["hidden"].str.title()

In [7]:
# Ansetzungsform bereinigen, Klammern entfernen

df["norm_name #1"] = df["norm_name #1"].str.replace(r"\(.*\)", "", regex=True).str.strip()
df["norm_name #2"] = df["norm_name #2"].str.replace(r"\(.*\)", "", regex=True).str.strip()

In [9]:
# einzelne Spalten individuell anpassen

df.loc[1037, ["Name #1", "hidden"]] = ["Ssynic", "BMCL Titelmatch"]
df.loc[1255, ["Name #1", "hidden"]] = ["Besser", "BMCL meets DLTLLY Rap Battle"]
df.loc[688, ["hidden"]] = ["Merlin abwesend, Craze kickt seine Parts alleine"]
df.loc[198, ["Name #2", "hidden"]] = ["Shizu", "Deelah abwesend, Shizu springt ein"]
df.loc[440, ["norm_name #2", "Location", "hidden"]] = ["Jollyjay", "Kontext", "Der Sir abwesend, JollyJay springt ein"]
df.loc[966, ["Name #1", "Name #2", "hidden"]] = ["Ekhead & CPE", "Brian Damage & Karma", "Sport Rap-Battle, DLTLLY & SPOX: Borussia Dortmund vs. FC Bayern München"]

In [11]:
# Freestyle-Battles, Ankündigungen, Interviews, 2on2-Battles rausfiltern > df_battles

df_battles = df[~df["hidden"].str.contains('Interview|Ankündigung|Freestyle|Teaser|Provokation', na=False)]
df_battles = df_battles[~df_battles["Name #1"].str.contains(r'&|Freestyle|Ankündigung|PPV|\+', na=False)]
df_battles = df_battles[~df_battles["Name #2"].str.contains(r'&|Freestyle|Ankündigung|PPV|\+', na=False)]

# unten ermittelte Dubletten entfernen
df_battles = df_battles.drop([228, 451])

In [13]:
# Aliase auflisten für norm_name-Spalten

aliases = {
    "Triplebeat": "Triple",
    "Triple Beat": "Triple",
    "TRIPLE": "Triple",  
    
    ".Tecey": "Tecey",
    
    "Alice": "Atropatrap",
    "Atzenkalle -": "Atzenkalle",

    "Bahamakarma": "Karma",
    "B-Dad": "Bdad",
    "Andres Kaliba": "Bilal",
    "Bong Teggy - Rematch": "Bong Teggy",
    "Bronko Banano Aka Street": "Bronko Banano",
    "Street": "Bronko Banano",
    
    "Cashus K.": "Cashus K",
    "Capco Jonnes": "Capco Jones",
    "Capcojones": "Capco Jones",
    "Cemo": "Cemo Dafuq",
    "Change": "Change19",
    "Cleptomatic": "Clep",
    "Clepto": "Clep",
    "Craze' Parts": "Craze",

    "D.E.Z.": "D.E.Z",
    "Der Fischer -": "Der Fischer",
    "Desperados": "Despo",

    "Ein Letztes Mal: Yarambo": "Yarambo",

    "Falk": "Falki",
    "Fatcap -": "Fatcap",
    "Fate One": "Fate",
    "Fate1": "Fate",
    "Four Seven": "Fourseven",
    "Fraxone": "Frax One",

    "Gier -": "Gier",

    "Hansen - Rematch": "Hansen",
    "Herrn Sören": "Herr Sören",

    "Jack Dragon - Rematch!": "Jack Dragon",
    "Jean Gough Frais": "Jean-Gough Frais",
    "Jizi": "Ji-Zi",
    "Juse Ju -": "Juse Ju",

    "Khacoby": "Karkobi",
    "King Lil&#39;C'": "King Lil'C",
    "Kol&#39;ja": "Kol'ja",

    "Lavas": "Lavaş",

    "Main Moe -": "Main Moe",
    "Malik N.": "Malik",
    "Mars": "Mars B.",
    "Mighty P.": "Mighty P",
    "Mighty Mo": "Meidi",

    "Nichtdeintyp": "Nicht Dein Typ",

    "O&#39;Spella": "O'Spella",
    
    "P-Zak -": "P-Zak",
    "Pueblo Escobar": "Pablo",
    "Papi Schlauch -": "Papi Schlauch",
    "Phillie": "Phillie.45",
    "Presto -": "Presto",
    "Pretty Shitty Jiggy": "PrettyShittyJiggy",
    "Prettyshittyjiggy": "Pretty Shitty Jiggy",

    "Quasi": "Quasi Infamous",

    "Ryko J": "Ryko-J",

    "Schlawinor": "Schlawin0r",
    "Schlawin0R": "Schlawin0r",
    "Snakeheadrhyme": "SnakeHeadRhyme",

    "Tableddn Timmy": "TableddnTimmy",
    "Tableddntimmy": "TableddnTimmy",
    "Tierstar -": "Tierstar",
    "Tightammic -": "Tightammic",

    "Mr. Whyte": "Yaamann",
    "Yamann": "Yaamann",
    "Mr. White": "Yaamann",
    "Mr White": "Yaamann"
}

df_battles["norm_name #1"] = df_battles["norm_name #1"].replace(aliases)
df_battles["norm_name #2"] = df_battles["norm_name #2"].replace(aliases)

In [15]:
# Battlerapper mit nur 2 Battles (beide Contender) rausfiltern > df_battles_clean

# Alle Namen aus beiden Spalten zählen
all_names = pd.concat([df_battles["norm_name #1"], df_battles["norm_name #2"]])
counts = all_names.value_counts()

# Bedingung: mindestens einer der beiden Namen kommt mehr als einmal vor
mask = df_battles.apply(lambda row: counts[row["norm_name #1"]] > 3 or counts[row["norm_name #2"]] > 3, axis=1)

# Gefiltertes DataFrame
df_battles_clean = df_battles[mask]

In [17]:
# Grafik

# Datenzugriff für die Grafik

battles = []
for _, row in df_battles_clean.iterrows():
     battles.append((row['norm_name #1'], row['norm_name #2'], int(row['Year']), row['Name #1'], row['Name #2'], row['Stadt'], row.get('Location', '')))

# Titelträger - für andere Darstellung auf Karte

champs = ["Nedal Nib", "Yarambo", "Mikesh", "Kato"]


# Graph aufbauen
G = nx.Graph()
for nn1, nn2, jahr, n1, n2, ort, location in battles:
    battle_info = f"{n1} vs. {n2}, {jahr}, {location}, {ort}"
    G.add_node(nn1)    # benennung der Punkte
    G.add_node(nn2)
    if G.has_edge(nn1, nn2):
        G[nn1][nn2]['info'].append(battle_info)
        G[nn1][nn2]['years'].append(jahr)
    else:
        G.add_edge(nn1, nn2, info=[battle_info], years=[jahr])

pos = nx.spring_layout(G, seed=42)

colorscale = px.colors.sequential.Turbo  # sehr bunte, breite Farbpalette

def interpolate_color(norm):
    # norm 0..1
    idx = int(norm * (len(colorscale)-1))
    return colorscale[idx]

# Jahreswerte für Normierung
years_all = [jahr for _, _, jahr, _, _, _, _ in battles]
min_year, max_year = min(years_all), max(years_all)


# Edge-Block mit Jahresfarbe und Colorbar

edge_traces = []
for u, v, data in G.edges(data=True):
    x0, y0 = pos[u]
    x1, y1 = pos[v]

    # Durchschnittsjahr für Farbskala
    avg_year = sum(data['years']) / len(data['years'])
    norm = (avg_year - min_year) / (max_year - min_year)
    
    # Farbe über Viridis interpolieren
    from matplotlib import cm
    cmap = cm.get_cmap('viridis')
    rgb = cmap(norm)  # (r,g,b,a)
    color = f'rgb({int(rgb[0]*255)},{int(rgb[1]*255)},{int(rgb[2]*255)})'

    # Linie selbst
    edge_traces.append(go.Scatter(
        x=[x0, x1],
        y=[y0, y1],
        mode="lines",
        line=dict(width=2, color=color),
        hoverinfo="none",
        showlegend=False
    ))

    # Hoverpunkte entlang der Linie (10%–90%)
    num_points = 50
    t = np.linspace(0.2, 0.8, num_points)
    x_vals = x0 + (x1 - x0) * t
    y_vals = y0 + (y1 - y0) * t
    info_text = "<br>".join(data['info'])

    edge_traces.append(go.Scatter(
        x=x_vals,
        y=y_vals,
        mode="markers",
        marker=dict(size=1, opacity=0),
        hoverinfo="text",
        text=[info_text]*num_points,
        hoverlabel=dict(
            bgcolor=color,
            font=dict(color="white")
        ),
        showlegend=False
    ))

# --- Colorbar als Legende hinzufügen ---
# Dummy-Trace, nur für die Colorbar
colorbar_trace = go.Scatter(
    x=[None], y=[None],
    mode='markers',
    marker=dict(
        colorscale='viridis',
        showscale=True,
        cmin=min_year,
        cmax=max_year,
        colorbar=dict(title='Jahr')
    ),
    hoverinfo='none',
    showlegend=False
)
edge_traces.append(colorbar_trace)


# Knoten vorbereiten
# normale Punkte
normal_nodes = [n for n in G.nodes() if n not in champs]
normal_x = [pos[n][0] for n in normal_nodes]
normal_y = [pos[n][1] for n in normal_nodes]

# Nachbarpunkte
normal_labels = [n for n in normal_nodes]
# Hovertext für normale Knoten
# Normale Knoten
normal_hover = []
for n in normal_nodes:
    hover_lines = []
    total_battles = 0
    
    for g in G.neighbors(n):
        edge_data = G[n][g]
        for battle_info in edge_data['info']:   # contender_info = Gegner + Jahr/Ort/Location; battle_info enthält z.B. "n1 vs n2, 2018 @ Berlin, LocationX" > wir schneiden den eigenen Namen raus
            parts = battle_info.split(", ", 1)[1]  # alles nach „n1 vs n2“
            contender_info = f"{g}, {parts}"
            hover_lines.append(contender_info)
        total_battles += len(edge_data['info'])

    hover_text = f"<b>{n}</b><i> Σ {total_battles}</i><br>{'<br>'.join(hover_lines)}<br>"
    normal_hover.append(hover_text)


trace_normal = go.Scatter(
    x=normal_x, y=normal_y,
    mode="markers+text",
    text=normal_labels,
    textposition="top center",
    hovertext=normal_hover,
    hoverinfo="text",
    textfont=dict(
        size=8,      # 👈 Text kleiner machen (Standard ist ca. 12–14)
        color="black" # optional Textfarbe
    ),
    marker=dict(
        size=8,
        color="grey",
        symbol="circle-open",
        opacity=1,
        line=dict(width=1, color="black")  # Randfarbe + Stärke
    )
)


# hervorgehobene Sterne
star_nodes = champs
star_x = [pos[n][0] for n in star_nodes]
star_y = [pos[n][1] for n in star_nodes]

star_labels = [n for n in star_nodes]
star_hover = []
for n in star_nodes:
    hover_lines = []
    total_battles = 0
    
    for g in G.neighbors(n):
        edge_data = G[n][g]
        for battle_info in edge_data['info']:
            parts = battle_info.split(", ", 1)[1]
            contender_info = f"{g}, {parts}"
            hover_lines.append(contender_info)
        total_battles += len(edge_data['info'])

    hover_text = f"<b>{n}</b><br>{'<br>'.join(hover_lines)}<br><i>Gesamt-Battles: {total_battles}</i>"
    star_hover.append(hover_text)


trace_stars = go.Scatter(
    x=star_x, y=star_y,
    mode="markers+text",
    text=star_labels,
    textposition="top center",
    hovertext=star_hover,
    hoverinfo="text",
    textfont=dict(
        size=8,     
        color="black"
    ),
    marker=dict(
        size=10,       # größer als normale Punkte
        color="gold",
        symbol="star",
        opacity=0.8,
        line=dict(width=1, color="black")  # Randfarbe + Stärke
    )
)


# Plot erstellen
fig = go.Figure(data=edge_traces,
                layout=go.Layout(
                    title="Battlerap in Deutschland - wer gegen wen",
                    title_x=0.5,
                    showlegend=False,
                    hovermode="closest",
                    margin=dict(b=20,l=5,r=5,t=40),
                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
                ))

fig.add_traces([trace_normal, trace_stars])

fig.show()


The get_cmap function was deprecated in Matplotlib 3.7 and will be removed in 3.11. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap()`` or ``pyplot.get_cmap()`` instead.



In [None]:
# Liste der rausgeschmissenen nur-2-Battles
df_one_battle = df_battles[~mask]
df_one_battle

In [None]:
# 1. Zeilen finden, die Klammern enthalten
mask_parens = df_battles_clean["Name #1"].str.contains(r"\(.*\)", na=False) | df_battles_clean["Name #2"].str.contains(r"\(.*\)", na=False)

# 2. Diese Zeilen in separatem DataFrame speichern
df_with_parens = df_battles_clean[mask_parens]


df_with_parens

In [None]:
# Dubletten finden

df_battles_clean["pair_sorted"] = df.apply(
    lambda row: tuple(sorted([row["norm_name #1"], row["norm_name #2"]])),
    axis=1
)

# Schritt 2: alle Paare zählen
pair_counts = df_battles_clean["pair_sorted"].value_counts()

# Schritt 3: nur Paare mit mehr als 1 Auftreten auswählen
duplicate_pairs = pair_counts[pair_counts > 1].index.tolist()

# Schritt 4: Zeilen ausgeben, die zu den Duplikaten gehören
df_duplicates = df_battles_clean[df_battles_clean["pair_sorted"].isin(duplicate_pairs)]

df_duplicates

In [None]:
# alle Namen, alphabetisch, zum Dubletten filtern

all_names = pd.concat([df_battles["norm_name #1"], df_battles["norm_name #2"]])
unique_names = sorted(all_names.unique())
unique_names