In [None]:
from pathlib import Path
import pandas as pd

# Pracujemy z plikami - CSV / TSV

## Czym jest CSV?

In [None]:
with open("simple.csv", "w") as f:
    f.write("""
imie,nazwisko,wiek
Jan,Kowalski,25
Anna,Nowak,37
""")

In [None]:
df = pd.read_csv('simple.csv')
df

In [None]:
with open("no_header.csv", "w") as f:
    f.write("""
Jan,Kowalski,25
Anna,Nowak,37
""")

In [None]:
df = pd.read_csv('no_header.csv', header=None)
df

In [None]:
df = pd.read_csv('no_header.csv', header=None, names=['name', 'surname', 'age'])
df

In [None]:
with open("semicolon.csv", "w") as f:
    f.write("""
imie;nazwisko;wiek
Jan;Kowalski;25
Anna;Nowak;35
""")

In [None]:
df = pd.read_csv('semicolon.csv', sep=";")
df

In [None]:
with open("broken_because_of_comma.csv", "w") as f:
    f.write("""
imie,nazwisko,wiek
Jan,Kowalski, Junior,25
Anna,Nowak,35
""")

In [None]:
df = pd.read_csv('broken_because_of_comma.csv', sep=",")
df

In [None]:
with open("fixed_because_of_comma.csv", "w") as f:
    f.write("""
imie,nazwisko,wiek
Jan,"Kowalski, Junior",25
Anna,Nowak,35
""")

In [None]:
df = pd.read_csv('fixed_because_of_comma.csv')
df

## Jak wczytywać daty z pliku CSV?

In [None]:
with open("with_dates.csv", "w") as f:
    f.write("""
imie,nazwisko,wiek,"data urodzenia",wizyta
Jan,Kowalski,25,1985-01-01,2024-01-01T12:00:00
Anna,Nowak,35,1954-02-26,2024-03-07T12:00:00
""")

In [None]:
df = pd.read_csv('with_dates.csv')
df

In [None]:
df.info()

In [None]:
df = pd.read_csv('with_dates.csv', parse_dates=['data urodzenia', 'wizyta'])
df

In [None]:
df.info()

In [None]:
df["data urodzenia"].dt.year

## Wczytywanie CSV ze ścieżki 

In [None]:
# wyobraźmy sobie, że plik z danymi żyje w jakiejś strukturze katalogów
# tworzymy więc zmienną DATA_PATH, która przechowuje ścieżkę do katalogu z danymi
DATA_PATH = Path(".").absolute()

# wczytujemy plik z danymi
df = pd.read_csv(DATA_PATH / "with_dates.csv", parse_dates=['data urodzenia', 'wizyta'])
df


## Czym jest TSV?

In [None]:
with open("simple.tsv", "w") as f:
    f.write("""
imie\tnazwisko\twiek
Jan\tKowalski\t25
Anna\tNowak\t35
""")

In [None]:
df = pd.read_csv('simple.tsv', sep="\t")
df