In [1]:
import os
import glob

# packs from my setup
songs_path = 'C:\\Users\\Lenovo\\AppData\\Roaming\\ITGmania\\Songs'
pack_names = [
    'Stamina RPG 6',
    'SRPG6 NNP',
    'Stamina RPG 7',
    'Stamina RPG 7 - SN',
    'Stamina RPG 7 - FE',
    'Stamina RPG 8',
    'Stamina RPG 8 - SN',
    'Stamina RPG 8 - FE',
    'Stamina RPG 9',
    'Stamina RPG 9 - SN',
    'Stamina RPG 9 - FE',
]
pack_paths = [os.path.join(songs_path, name) for name in pack_names]

glob_path = '.\\packs\\*'
pack_paths += glob.glob(glob_path)

In [2]:
from simfile.dir import SimfilePack
from analysis.analyzer import SongAnalyzer
from analysis.hash import get_hash
import pandas as pd

try:
    existing_df = pd.read_csv('dataset.csv')
    seen_hashes = set(existing_df['Hash'])
except FileNotFoundError:
    existing_df = None
    seen_hashes = set()

data = []
for pack_path in pack_paths:
    pack = SimfilePack(pack_path)
    for song in pack.simfiles(strict=False):
        song_analyzer = SongAnalyzer(song)
        song_str = f'{pack.name}/{song.title or ""}'

        for chart_analyzer in song_analyzer.chart_analyzers.values():
            chart = chart_analyzer.chart
            if (chart.stepstype or '').lower() != 'dance-single':
                continue
            diff = (chart.difficulty or '').lower()
            title = song_str
            if diff != 'challenge':
                title += f' ({diff})'

            chart_hash = get_hash(song, chart_analyzer.chart)
            if chart_hash in seen_hashes:
                print(f'skipping {title} (dupe hash)')
                continue
            seen_hashes.add(chart_hash)
            print(f'analyzing {title}')

            stream_info = chart_analyzer.get_stream_info()
            total_stream = stream_info['total_stream']
            total_break = stream_info['total_break']
            bpms = stream_info['bpms']
            avg_bpm = stream_info['avg_bpm']
            try:
                meter = int(chart_analyzer.chart.meter)
            except ValueError:
                print(song.title, ': invalid meter')
                continue

            if total_stream == 0:
                print(title, ': no stream')
            else:
                mult = stream_info['quant'] / 16
                data.append((
                    title,
                    chart_hash,
                    meter,
                    round(avg_bpm * mult, 3),
                    total_stream * mult,
                    total_stream / (total_stream + total_break)
                ))

  __import__("pkg_resources").declare_namespace(__name__)  # type: ignore


analyzing Stamina RPG 6/[11] [128] Alcohol-Free
analyzing Stamina RPG 6/[11] [136] Cowboy
analyzing Stamina RPG 6/[11] [136] Flutterwonder (Hard)
analyzing Stamina RPG 6/[11] [124] Glory Days
analyzing Stamina RPG 6/[11] [145] Hawaiian Roller Coaster Ride (Medium)
analyzing Stamina RPG 6/[11] [130] Helix (Hard)
analyzing Stamina RPG 6/[11] [131] In The Game (The Football Song)
analyzing Stamina RPG 6/[11] [145] Kislotnyy Dozhd (Medium)
analyzing Stamina RPG 6/[11] [120] Le Perv
analyzing Stamina RPG 6/[11] [124] Lonely
analyzing Stamina RPG 6/[11] [145]  Move That Body!
analyzing Stamina RPG 6/[11] [147] This game (Hard)
analyzing Stamina RPG 6/[11] [128] We Are Giants (feat. Dia Frampton)
analyzing Stamina RPG 6/[12] [138] Cake By The Ocean
analyzing Stamina RPG 6/[12] [174] Gargantua
analyzing Stamina RPG 6/[12] [160] Gimme Your Desire (Hard)
analyzing Stamina RPG 6/[12] [140] Higanbana
analyzing Stamina RPG 6/[12] [156] I Want U
analyzing Stamina RPG 6/[12] [135] I'm Coming
analyzin

In [3]:
columns = ['Name', 'Hash', 'Meter', 'BPM', 'Total stream', 'Stream density']
df = pd.DataFrame(data, columns=columns)
if existing_df is not None:
    df = pd.concat([existing_df, df], axis=0, ignore_index=True)
df

Unnamed: 0,Name,Hash,Meter,BPM,Total stream,Stream density
0,Stamina RPG 6/[11] [128] Alcohol-Free,0b020e29c3919c10c1d3fb2776e813b18af30114,11,128.000,52.0,0.626506
1,Stamina RPG 6/[11] [136] Cowboy,f2bc769bdf7addfc64d7e46643e194f184820a56,11,136.000,33.0,0.492537
2,Stamina RPG 6/[11] [136] Flutterwonder (Hard),fc05556c0b501fabafe79a16923e910518d5d03d,11,136.000,38.0,0.487179
3,Stamina RPG 6/[11] [124] Glory Days,c0b5988bae418d86ccac7c7209f605932857bb81,11,124.000,44.0,0.483516
4,Stamina RPG 6/[11] [145] Hawaiian Roller Coast...,d0b51e7437f6705f45bf6d9c1254964330bfcc4e,11,145.000,29.0,0.358025
...,...,...,...,...,...,...
1661,Stamina RPG 6/[26] [272] Holy Orders (Be Just ...,c1e309e4b0570bacf1b2f9ca17c29406c529fbae,26,272.076,222.0,0.860465
1662,Stamina RPG 6/[26] [282] Lay Your Hands On Me,7fcd47a24c5450fc48ece61c72cecac134f7d474,26,282.000,144.0,0.782609
1663,Stamina RPG 6/[26] [260] Stratospheric Intricacy,e410d5bf872d5f37ba4713724d6d6fd54d4ade67,26,226.474,3182.0,0.827568
1664,Stamina RPG 6/[27] [290] Monstrum Spectrum,4c710adac4297c364e412ea1f8171eeff301bc35,27,290.000,98.0,0.970297


In [4]:
df.to_csv('dataset.csv', index=False)