## <B>Zone-based clustering</B>

In [None]:
import pandas as pd
from sklearn.cluster import DBSCAN

# Data file path
file_path = '영화5 레버넌트 박스 영역 좌표.txt'

# Importing data
data = pd.read_csv(file_path, sep=', ', header=None)

# Set column names for a data frame
data.columns = ['frame', 'x1', 'y1', 'x2', 'y2', 'confidence']

# Generate clustered data by selecting only the columns you need
coords = data[['x1', 'y1']].to_numpy()  # Select only x1, y1 coordinates

# Create DBScan objects and perform clustering
dbscan = DBSCAN(eps=0.005, min_samples=30)  # Adjust eps and min_samples values
data['cluster'] = dbscan.fit_predict(coords)

# Save clustering results to a text file
output_file_path = '영화5 클러스터링.txt'  # Text file path
data.to_csv(output_file_path, sep='\t', index=False, header=True)  # Tab-delimited text files

# Output the results
print(data)

## <B>Clustering with a new number</B>

In [None]:
import pandas as pd

# Read existing data
file_path = '영화5 클러스터링.txt'  # Original clustered file
data = pd.read_csv(file_path, sep='\t')  # Loading tab-delimited files

# Assign a new cluster number
new_clusters = []
current_new_cluster = 0
previous_cluster = None

for cluster in data['cluster']:
    if cluster != previous_cluster:  # When it's different from your previous cluster
        current_new_cluster += 1  # Increase to a new number
        previous_cluster = cluster  # Update old cluster values to current values
    new_clusters.append(current_new_cluster)  # Save the new cluster number

data['new_cluster'] = new_clusters  # Add a new cluster number

# Save the results to a new file
output_file_path = '영화5 새로운 클러스터링.txt'  # A file to save the updated results to
data.to_csv(output_file_path, sep='\t', index=False)  # Save the results in tabs

# Output the results
print(data)

## <B>Ignore 45 or fewer iterations (clustering filtering)</B>

In [None]:
import pandas as pd

# Loading files
file_path = '영화5 새로운 클러스터링.txt'
data = pd.read_csv(file_path, sep='\t')

# Counting cluster numbers and counts
filtered_data = []
previous_cluster = None
count = 0

# Count the frequency of each cluster
for index, row in data.iterrows():
    cluster = row['new_cluster']
    
    if cluster == previous_cluster:
        count += 1
    else:
        # Handling old clusters
        if previous_cluster is not None:
            if count > 45:  # Ignore 45 or fewer iterations
                filtered_data.extend([previous_cluster] * count)
        
        # Initialize the current cluster
        count = 1  
        previous_cluster = cluster

# Process the last cluster
if count > 3:
    filtered_data.extend([previous_cluster] * count)

# Create a new dataframe (remove clusters with 30 or fewer iterations)
new_filtered_df = data[~data['new_cluster'].isin(set(data['new_cluster']) - set(filtered_data))]

# Save the results
output_file_path = '영화5 클러스터 필터링.csv'
new_filtered_df.to_csv(output_file_path, sep='\t', index=False)

# Output filtered results
print(new_filtered_df)

## <B>Time filtering</B>

In [None]:
import pandas as pd

# Read a data file
file_path = '영화5 클러스터 필터링.csv'
data = pd.read_csv(file_path, delim_whitespace=True, header=None)

# Check the data type of the first column of data
print(data.dtypes)

# Convert first column to float (convert string to number)
data.iloc[:, 0] = pd.to_numeric(data.iloc[:, 0], errors='coerce')

# Functions to convert frames to time
def frame_to_time(frame):
    return frame / 30

# Assuming the first column is the frame number
data['time'] = data.iloc[:, 0].apply(frame_to_time)

# Save data as a new file
output_file_path = '영화5 클러스터 시간필터링.csv'
data.to_csv(output_file_path, sep='\t', header=False, index=False)

print(f"The converted data was saved to the file {output_file_path} file.")

## <B>Calibrate Box Region Y Coordinate</B>

In [None]:
import pandas as pd

# Loading data (reading tab-delimited files)
df = pd.read_csv('영화5 클러스터 시간필터링.csv', sep='\t')

# Check the first few rows and column names in the dataframe
print(df.head())         # View the first five rows of a dataframe
print(df.columns)        # Print all column names

# Convert to value multiplied by 0.8 for columns y1, y2
df['y1'] = df['y1'] * 0.9261501210653754
df['y2'] = df['y2'] * 0.9261501210653754

# Save the converted dataframe as a CSV file (optional)
df.to_csv('영화5 보정.csv', index=False)