In [None]:
import pandas as pd
from zipfile import ZipFile
from random import randint


def load_zipped_data(zip_path: str) -> pd.DataFrame:
    with ZipFile(zip_path, 'r') as z:
        with z.open(z.namelist()[0]) as f:
            data = pd.read_csv(f)
    return data

def sample_and_save_csv(dataframe, n_samples, output_file) -> pd.DataFrame:
    '''
    Randomly selects `n_samples` rows from a DataFrame and saves them to a CSV file.

    Parameters
    ----------
        df : pandas.DataFrame
                The DataFrame containing the data.
        n_samples: int
            The number of rows to randomly select.
        output_file: str
            The name of the CSV file where the selected rows will be saved.

    Returns
    -------
        pandas.DataFrame: 
            A DataFrame containing the randomly selected rows.
    '''
    
    # Randomly select `n_samples` rows
    sampled_df = dataframe.sample(n=n_samples, random_state=randint(0, 1000))
    
    # Save selected rows to csv
    sampled_df.to_csv(output_file, index=False, encoding='utf-8')
    
    return sampled_df

# Loads and creates random samples from original dataset, saves them into cvs files
df = load_zipped_data('../data/dataset.zip')
sample_and_save_csv(df, 4500, '../data/uploads/test_naming.csv')

Unnamed: 0,date,cloud_cover,sunshine,global_radiation,max_temp,mean_temp,min_temp,precipitation,pressure,snow_depth
897,19810616,4.0,7.2,240.0,14.0,12.8,8.6,0.0,102190.0,0.0
11000,20090212,5.0,4.0,66.0,5.1,2.0,-1.1,2.2,102480.0,0.0
10194,20061129,1.0,4.6,47.0,14.0,9.8,5.6,0.2,102720.0,0.0
15287,20201108,7.0,4.8,71.0,14.1,11.3,8.6,2.0,101810.0,
627,19800919,7.0,5.3,129.0,19.7,18.6,15.7,0.6,100750.0,0.0
...,...,...,...,...,...,...,...,...,...,...
3050,19870509,3.0,13.9,308.0,14.9,15.2,6.6,0.0,101580.0,0.0
10659,20080308,6.0,0.2,47.0,10.9,7.0,3.2,3.6,100470.0,0.0
6439,19960818,1.0,13.0,272.0,31.4,23.0,15.4,0.0,101900.0,0.0
6553,19961210,8.0,0.0,12.0,5.2,3.8,2.8,0.0,101850.0,0.0
