In [None]:
import os
import subprocess
import pandas as pd

folder_path = "F:\\"
# folder_path = "C:\\Users\\Karol\\Videos\\" # Seriale
ffprobe_path = "C:\\Users\\Karol\\Downloads\\Free_MP4_to_MP3_Converter_64bit_PORTABLE\\tools\\FFmpeg64\\ffprobe.exe"

In [None]:
def get_video_info(file_path):
    """ Pobiera dane o filmie, takie jak kodeki, rozdzielczość, czas trwania i inne. """
    
    # Sprawdzanie, czy plik istnieje
    if not os.path.isfile(file_path):
        print(f"Plik nie istnieje: {file_path}")
        return None
    
    # Debugowanie – wyświetlanie pełnej ścieżki pliku
    print(f"Analizowanie pliku: {file_path}")

    # Pobieranie informacji o wideo
    cmd = [
        ffprobe_path, "-v", "error", "-select_streams", "v:0", "-show_entries", 
        "stream=codec_name,width,height", "-of", "csv=p=0", file_path
    ]
    try:
        video_info = subprocess.run(cmd, capture_output=True, text=True, check=True).stdout.strip().split(',')
    except subprocess.CalledProcessError as e:
        print(f"Błąd przy analizowaniu pliku {file_path}: {e}")
        return None
    except FileNotFoundError as e:
        print(f"Błąd - plik nie znaleziony: {file_path}")
        print(e)
        return None
    
    # Pobieranie informacji o audio
    cmd_audio = [
        ffprobe_path, "-v", "error", "-select_streams", "a:0", "-show_entries", 
        "stream=codec_name", "-of", "csv=p=0", file_path
    ]
    try:
        audio_info = subprocess.run(cmd_audio, capture_output=True, text=True, check=True).stdout.strip()
    except subprocess.CalledProcessError as e:
        print(f"Błąd przy analizowaniu audio pliku {file_path}: {e}")
        audio_info = None

    # Pobieranie rozmiaru i czasu trwania pliku
    cmd_size = [
        ffprobe_path, "-v", "error", "-show_entries", "format=size", 
        "-of", "default=noprint_wrappers=1:nokey=1", file_path
    ]

    try:
        file_info = subprocess.run(cmd_size, capture_output=True, text=True, check=True).stdout.strip().split(',')

        size_in_bytes = float(file_info[0])
        human_readable_size = f"{size_in_bytes / (1024 * 1024 * 1024):.2f} GB"  # w GB
    except subprocess.CalledProcessError as e:
        print(f"Błąd przy analizowaniu rozmiaru pliku {file_path}: {e}")
        return None

    # Zmiana polecenia na zwrócenie czasu trwania w formacie HH:MM:SS
    cmd_duration = [
        ffprobe_path, "-v", "error", "-show_entries", 
        "format=duration", "-sexagesimal", "-of", "default=noprint_wrappers=1:nokey=1", file_path
    ]
    try:
        duration_str = subprocess.run(cmd_duration, capture_output=True, text=True, check=True).stdout.strip()
        if duration_str:
            duration_str = duration_str.split('.')[0]
    except subprocess.CalledProcessError as e:
        print(f"Błąd przy pobieraniu czasu trwania z pliku {file_path}: {e}")
        duration_str = None

    return {
        "file_name": os.path.basename(file_path),
        "extension": os.path.splitext(file_path)[1],
        "size": human_readable_size,
        "duration": duration_str,  # Czas trwania w formacie HH:MM:SS
        "video_codec": video_info[0] if len(video_info) > 0 else None,
        "audio_codec": audio_info if audio_info else None,
        "resolution": f"{int(video_info[1])}x{int(video_info[2])}" if len(video_info) > 2 else None
    }

In [None]:
data = []

for root, _, files in os.walk(folder_path):
    for file in files:
        file_path = os.path.join(root, file)
        if file.lower().endswith((".mp4", ".mkv", ".avi", ".mov", ".wmv", ".flv")):
            video_info = get_video_info(file_path)
            if video_info:
                data.append(video_info)

In [12]:
df = pd.DataFrame(data)
df

Unnamed: 0,file_name,extension,size,duration,video_codec,audio_codec,resolution
0,2024-09-07 21-40-51.mkv,.mkv,0.01 GB,0:00:10,h264,aac,1920x1080
1,2024-09-07 22-23-46.mkv,.mkv,0.01 GB,0:00:05,h264,aac,1920x1080
2,2024-09-08 11-23-36.mkv,.mkv,0.03 GB,0:00:12,h264,aac,1920x1080
3,2024-09-08 11-51-29.mkv,.mkv,0.04 GB,0:00:14,h264,aac,1920x1080
4,2025-02-14 03-58-17.mkv,.mkv,0.05 GB,0:09:15,h264,aac,1920x1080
...,...,...,...,...,...,...,...
604,Voltron Vlogs Short 3 of 7 - Allura.mp4,.mp4,0.01 GB,0:02:28,h264,aac,1280x720
605,Voltron Vlogs Short 4 of 7 - Lance.mp4,.mp4,0.01 GB,0:03:23,h264,aac,1280x720
606,Voltron Vlogs Short 5 of 7 - Pidge.mp4,.mp4,0.01 GB,0:02:58,h264,aac,1280x720
607,Voltron Vlogs Short 6 of 7 - Hunk.mp4,.mp4,0.02 GB,0:03:21,h264,aac,1280x720


In [13]:
print("Unikalne wartości dla kodeków wideo:")
df_video_codec = df['video_codec'].value_counts().reset_index()
df_video_codec.columns = ['video_codec', 'count']
df_video_codec

Unikalne wartości dla kodeków wideo:


Unnamed: 0,video_codec,count
0,h264,366
1,hevc,243


In [14]:
print("\nUnikalne wartości dla kodeków audio:")
df_audio_codec = df['audio_codec'].value_counts().reset_index()
df_audio_codec.columns = ['audio_codec', 'count']
df_audio_codec


Unikalne wartości dla kodeków audio:


Unnamed: 0,audio_codec,count
0,aac,234
1,eac3,155
2,ac3,155
3,dts,64
4,vorbis,1


In [15]:
print("\nUnikalne rozszerzenia plików:")
df_extension = df['extension'].value_counts().reset_index()
df_extension.columns = ['extension', 'count']
df_extension


Unikalne rozszerzenia plików:


Unnamed: 0,extension,count
0,.mkv,305
1,.mp4,304


In [16]:
print("\nUnikalne rozdzielczości:")
df_resolution = df['resolution'].value_counts().reset_index()
df_resolution.columns = ['resolution', 'count']
df_resolution


Unikalne rozdzielczości:


Unnamed: 0,resolution,count
0,1920x1080,334
1,1920x816,131
2,1440x1080,61
3,1280x720,56
4,852x480,10
5,854x480,5
6,640x480,3
7,1920x802,2
8,1918x802,2
9,720x404,1


In [None]:
df_video_codec

print("\nUnikalne wartości dla kodeków audio:")
df_audio_codec = df['audio_codec'].value_counts().reset_index()
df_audio_codec.columns = ['audio_codec', 'count']
df_audio_codec

print("\nUnikalne rozszerzenia plików:")
df_extension = df['extension'].value_counts().reset_index()
df_extension.columns = ['extension', 'count']
df_extension

print("\nUnikalne rozdzielczości:")
df_resolution = df['resolution'].value_counts().reset_index()
df_resolution.columns = ['resolution', 'count']
df_resolution

In [None]:
df.to_csv("film_info.csv", index=False)

In [None]:
df_sorted = df.sort_values(by=['video_codec', 'audio_codec'], ascending=[True, True])

# Zapis do pliku CSV
df_sorted.to_csv("film_info_sorted.csv", index=False)