# GIT PYTHON

In [1]:
import os
import sys
import pandas as pd
from datetime import datetime, timedelta
from pycovid import pycovidfunc as cv

git_dir = r"C:\Program Files\Git\cmd"
git_bin = os.path.join(git_dir, "git")

os.putenv("GIT_PYTHON_GIT_EXECUTABLE", git_bin)
os.environ.putenv("GIT_PYTHON_GIT_EXECUTABLE", git_bin)

# Making sure that it is first in PATH
sys.path = [git_dir] + sys.path
os.environ["PATH"] = os.pathsep.join([git_dir]) + os.pathsep + os.environ["PATH"]

# Only import git now, because that's when the path is checked!
import git

In [2]:
# Read the config file to check for data file information:

if 'config.csv' in os.listdir(os.getcwd()):
    config = pd.read_csv('config.csv',index_col='var').fillna('-')
else:
    raise FileNotFoundError('No configuration file "config.csv" found.')
    
who_data_dir = config.loc['who_data_dir'].path
repo = git.Repo(config.loc['git_repo'].path)

# upstream repository
upstream_repo = repo.remotes.upstream

# Pull upstream base repo and check for modified files:
lm_frame = cv.get_date_modified(who_data_dir)

g = git.Git(upstream_repo)
g.pull('upstream','master')

lm_after_git_pull = cv.get_date_modified(who_data_dir)
count_modified = 0

for idx in lm_frame.index:
    new_last_modified = lm_after_git_pull.loc[idx].last_modified
    if lm_frame.loc[idx].last_modified != new_last_modified:
        count_modified += 1

who_file_list = os.listdir(who_data_dir)
for file in who_file_list:
    if not file.endswith('.csv'):
        who_file_list.remove(file)

# Compare the latest WHO file to the raw data update information
# and calculates the number of files to update:

if count_modified != 0:
    print('{} existing files were updated since last pull'.format(count_modified))
    print('generating new database...')
    try:
        df = cv.raw_data_formatter(who_file_list,who_data_dir)
        print('new database generated succesfully!')
    except:
        print('process aborted. No new database generated.')
else:
    last_update = pd.to_datetime(config.loc['raw_data'].last_update)
    latest_who_file_date = pd.to_datetime(who_file_list[-1].split(sep='.')[0])

    files_to_update = (latest_who_file_date - last_update).days

    # Generating the list of new files to update the database
    if files_to_update != 0:
        list_of_new_files = []
        for i in list(range(1,files_to_update + 1)):
            new_date = datetime.strftime((last_update
                                          + timedelta(days=i)).date(),
                                          format='%m-%d-%Y')
            list_of_new_files.append(new_date + '.csv')
    
        # Generating a dataframe with new information:
        df = cv.raw_data_formatter(list_of_new_files,who_data_dir)

        # Appending the new data to existing raw data file and updating
        # the raw data information in the config file:

        raw_data_path = config.loc['raw_data'].path
        config.loc['raw_data'].last_update = new_date

        df.to_csv(raw_data_path, mode='a', index=False, header=None)
        config.to_csv('config.csv')
        print('No existing files were updated')
        print('%d new files found. All files appended into the raw data file' 
              % (files_to_update))
    else:
        print('0 new files found. No further action necessary')

2 new files found. All files appended into the raw data file


In [3]:
print(repo.git.diff(None, name_only=True))

consolidated_data/raw_data.csv
python/notebooks/.ipynb_checkpoints/github_python_interface-checkpoint.ipynb
python/notebooks/config.csv
python/notebooks/github_python_interface.ipynb


In [5]:
cv.commit_to_repo(repo,message='integrating gitpython with raw data treatment')

Commit process succesfull


In [6]:
cv.repo_info(repo)

Repo at None successfully loaded.
Repo local path: C:\Users\user\Documents\GitHub\COVID-19
Repo description: covid19 data analysis git project
Repo active branch: master
Remote named "origin" with URL "https://github.com/psychopresley/COVID-19.git"
Remote named "upstream" with URL "https://github.com/CSSEGISandData/COVID-19.git"
Last commit for repo: a06d341c4c692f4777f2dd3ea1c4aac961ec3281.
----
commit: a06d341c4c692f4777f2dd3ea1c4aac961ec3281
"testing gitpython automated process 07-17-2020" by psychopresley (psychopresley@gmail.com)
2020-07-17 17:50:49-03:00
count: 1366 and size: 272


In [5]:
headcommit = repo.head.commit