In [None]:
import os
import pandas as pd
import subprocess
from datetime import datetime
from dvc.api import DVCFileSystem

In [None]:
# create working data folder

os.makedirs('/workspace/data/one_file_two_versions')

In [None]:
# define paths

local_data_path = '/workspace/data/one_file_two_versions'
dvc_path = 'data/one_file_two_versions/data.csv'

In [None]:
# function to add new version to DVC and Git

def commit_version(rev):
    subprocess.run(["dvc", "add", local_data_path + "/data.csv"])
    subprocess.run(["git", "add", local_data_path + "/*"])
    subprocess.run(["git", "commit", "-m", rev])
    subprocess.run(["git", "tag", rev])
    #subprocess.run(["git", "push", "--atomic", "origin", "dev", rev]) # if you had a remote...
    subprocess.run(["dvc", "push"])

In [None]:
# create a dataframe

data = [
    {'name': 'Suzy', 'age': 78, 'salary': 40000},
    {'name': 'Bill', 'age': 50, 'salary': 1000},
    {'name': 'Fred', 'age': 32, 'salary': 5000}
]

df = pd.DataFrame.from_records(data)

df.to_csv(local_data_path + '/data.csv', index=False, )

In [None]:
# version the dataframe

now_v1 = datetime.now().strftime('%Y%m%dT%H%M%S')

commit_version(now_v1)

In [None]:
# modify the original dataframe

new_data = [
    {'name': 'Bonnie', 'age': 40, 'salary': 84000}
]

new_df = pd.DataFrame.from_records(new_data)

df = pd.concat([df, new_df], ignore_index=True)

df.to_csv(local_data_path + '/data.csv', index=False)

In [None]:
# version the dataframe again

now_v2 = datetime.now().strftime('%Y%m%dT%H%M%S')

commit_version(now_v2)

In [None]:
# function to dump dataframe contents from DVC

def print_version(file_rev):
    fs = DVCFileSystem("/workspace", rev=file_rev)
    with fs.open(dvc_path) as f:
        df = pd.read_csv(f)
        print(df)

In [None]:
# view the original dataframe version

print_version(now_v1)

In [None]:
# view the updated dataframe version

print_version(now_v2)