In [2]:
# Image handling
from PIL import Image  
from PIL import Image, ImageFilter
import cv2
import imghdr

# Data handling
import pandas as pd
import numpy as np

# Filesystem  
import os

# Text processing
import pytesseract  
import re

# Dates
from datetime import datetime

In [3]:
def slice_video_on_profile_change(video_path, output_folder, threshold=100):

    # Open video 
    cap = cv2.VideoCapture(video_path)

    # Keep previous frame to compare with
    prev_img = None 

    # Counter for sliced segment numbers
    seg_num = 0

    # Read video frames until end
    while cap.isOpened():

        # Read next frame
        ret, frame = cap.read()

        # Break out of loop if video ended
        if not ret:  
            break

    # Convert colored frame to grayscale
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Compare current frame vs previous  
    # If difference bigger than threshold, profile changed
    if prev_img is None or np.mean(np.abs(gray - prev_img)) > threshold:
      
        # Save current frame as new profile  
        prev_img = gray.copy()

        # Increment segment counter
        seg_num += 1

        # Write sliced segment frame to file
        cv2.imwrite(f"{output_folder}/segment_{seg_num}.jpg", frame)

    # Release video resource  
    cap.release()

In [4]:
# Set of allowed image formats
IMAGE_FORMATS = {'png', 'jpg', 'jpeg'} 

def ocr_image(image_path, kernel_size=1):

    # Validate image file
    ext = os.path.splitext(image_path)[1].lower()
    if ext[1:] not in IMAGE_FORMATS and imghdr.what(image_path) not in IMAGE_FORMATS: 
        return None
    
    # Load image
    image = Image.open(image_path)

    # Convert to grayscale    
    gray = image.convert('L')

    # Apply gaussian blur to remove noise
    blur = gray.filter(ImageFilter.GaussianBlur(radius=kernel_size))

    # Perform OCR
    text = pytesseract.image_to_string(blur)

    return text

In [5]:
def append_if_valid(df, row, valid_users):
    username = row['Username']

    if username in valid_users:
        df = df.append(row, ignore_index=True)

    return df 

In [6]:
# Create empty dataframe 
df = pd.DataFrame(columns=['IGN', 'Relic Donation Points', 'Date'])

In [7]:
usernames = """
Crimson
SAIKA SAN
Acebites
BigBoyChaddy
ChocoChipDinie
darkvioletfox
Gingerdoodles
Glutenburgh
jjoool
jtrimble3
JuicyJJ1
Karou8
mangozic
MarinKitagawa  
MelonBunii
Minty24
MochixIsland
Momomoses
Nickknack
Portallifesource
r2miss
RougeAether
SALTMAMA
Steveth
V1b1ingFlash
valertine
Windsweep
xChocobo
Zane0930
"""

# Split by newline 
username_list = usernames.split("\n")

# Remove empty strings
valid_users = list(filter(None, username_list))

print(valid_users)

['Crimson', 'SAIKA SAN', 'Acebites', 'BigBoyChaddy', 'ChocoChipDinie', 'darkvioletfox', 'Gingerdoodles', 'Glutenburgh', 'jjoool', 'jtrimble3', 'JuicyJJ1', 'Karou8', 'mangozic', 'MarinKitagawa  ', 'MelonBunii', 'Minty24', 'MochixIsland', 'Momomoses', 'Nickknack', 'Portallifesource', 'r2miss', 'RougeAether', 'SALTMAMA', 'Steveth', 'V1b1ingFlash', 'valertine', 'Windsweep', 'xChocobo', 'Zane0930']


In [12]:
ocr_image('slice/segment_41.jpg')

"@ Everybody's Friend\nMelonBunii\n\nA jl Crystal 3\n148) WISTERIA\n\nActivity Points 358,003 4a\n\nBasic Activity Expansion Relic Donation\nPoints oints Points\n250,000 803 107,200\n\n~™ Prestige 123,470 tikes & Gifts\n"

In [None]:
# Example call 
#slice_video_on_profile_change('video.mp4', 'slice')

# Folder path 
folder_path = 'slice/' 

# Loop through all images in folder
for filename in os.listdir(folder_path):
    try:
        # Perform OCR on image
        text = ocr_image(folder_path+filename)
    

        # Username is after first \n
        username = text.split("\n", 1)[1]

        # Points is last number before "Prestige"
        points = re.search(r'(\d+)\n.*Prestige', text).group(1)


        # Get image upload date
        upload_time = os.path.getmtime(image_path)
        upload_date = datetime.fromtimestamp(upload_time).date()

        # Create new row
        new_row = {
            'IGN': username,
            'Relic Donation Points': points,
            'Date': upload_date
        }

        # Append row if username is valid
        df = append_if_valid(df, row, valid_users)

        # Drop duplicates
        df = df.drop_duplicates()
    except:
        pass