# Processing and analysis of data for meteor detection
## GRAPHS - Setting up the working environment for data analytics

This command is used to install the Python packages matplotlib and pandas, which are commonly used for data visualization and data manipulation tasks.

In [None]:
pip install matplotlib pandas

1. Graph - BOX PLOT

Loads data from a CSV file into a Pandas DataFrame, defines the order of categories, creates and displays a box plot to show the distribution of echo lengths for different types, adding mean values to the plot.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


df = pd.read_csv('testing5_1.csv')


desired_order = ['dlhá bez headecha', 'dlhá s headechom', 'krátka bez headecha', 'krátka s headechom', ]


plt.figure(figsize=(10, 8))
sns.boxplot(x='label', y='length_in_sec', data=df, order=desired_order)
plt.xlabel('Typ ozveny')
plt.ylabel('Dĺžka ozveny (s)')
plt.title('Rozloženie dĺžok ozvien pre jednotlivé typy')
plt.xticks(rotation=45)


plt.ylim(0, 8)  
plt.yticks(np.arange(0, 7.28, 0.25))  
plt.grid(axis='y', linestyle='--', alpha=0.7)

means = df.groupby('label')['length_in_sec'].mean().values
for i, mean in enumerate(means):
    plt.text(i, mean + 0.1, f'Priemer: {mean:.2f}', ha='center', va='bottom', fontsize=8, color='blue')
    plt.plot([i - 0.2, i + 0.2], [mean, mean], color='blue', linestyle='-', linewidth=2)


plt.show()

2 Graph - BAR CHART

Reads data from a CSV file into a Pandas DataFrame, converts dates to a date format, groups events by months, and displays these events using a bar chart. This chart is important for understanding the distribution of events over different months.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('testing5_1.csv')

df['date'] = pd.to_datetime(df['date'], format='%d.%m.%Y')

events_by_month_grouped = df.groupby([df['date'].dt.strftime('%B'), 'label']).size().unstack(fill_value=0)

months_order = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August']

plt.figure(figsize=(12, 8))
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']  
events_by_month_grouped.loc[months_order].plot(kind='bar', stacked=True, color=colors)
plt.title('Rozloženie udalostí podľa mesiaca a skupiny')
plt.xlabel('Mesiace')
plt.ylabel('Počet udalostí')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.xticks(rotation=45)
plt.legend(title='Skupina')
plt.tight_layout()
plt.show()

3. Graph - PIE CHARTS

Reads data from a CSV file into a Pandas DataFrame, creates pie charts to show the distribution of echo counts over hours for different types of echoes, and prints the echo counts for each hour. These charts are essential for visualizing the hourly distribution of echo counts and identifying patterns.

In [None]:
import matplotlib.pyplot as plt
import pandas as pd


df = pd.read_csv('testing5_1.csv')


df['hour'] = pd.to_datetime(df['picture_ends_in']).dt.hour


labels = ['dlhá bez headecha', 'dlhá s headechom', 'krátka bez headecha', 'krátka s headechom']

fig, axs = plt.subplots(2, 2, figsize=(24, 20))
axs = axs.flatten()


for i, label in enumerate(labels):

    subset = df[df['label'] == label]

    counts = subset['hour'].value_counts().sort_index()

    axs[i].pie(counts, labels=counts.index, autopct='%1.1f%%', startangle=90)
    axs[i].set_title(label)
    axs[i].set_xlabel('Hodina')
    axs[i].set_ylabel('')
    

    print(f'Výsledky pre skupinu ozvien: {label}')
    for hour, count in counts.items():
        print(f'Hodina {hour}:00 - {hour}:59 - Počet ozvien: {count}')

plt.suptitle('Distribúcia počtu ozvien v priebehu hodín pre jednotlivé typy ozvien', y=1.05)
plt.tight_layout()
plt.show()