In [None]:
# Report for the comms team and for the Highlights tab to be sent to LEIDs
# Before running this script, download the basic publication report file from RPS and rename it to 'publications'.
# Filter the file on publications with a blank 'Reporting Date 1'. 
# Check these publications and fill the publication date in manually on RPS if possible. 

import pandas as pd
import numpy as np
from datetime import datetime

def prepare_dataframe():
    df=pd.read_csv("publications.csv", index_col="ID")
    # Convert reporting date column to datetime in day-month-year format
    df['Reporting date 1'] = pd.to_datetime(df['Reporting date 1'], dayfirst=True)
    df['Online publication date'] = pd.to_datetime(df['Online publication date'], dayfirst=True)
    # Create a date column which takes the first online publication date if earlier than the reporting date 1
    df['Publication_Date'] = np.where(df['Online publication date']<df['Reporting date 1'], df['Online publication date'], df['Reporting date 1'])
    # Filter to include only publications from today's date onwards. Include only relevant columns.
    df=df.loc[df['Publication_Date']>= pd.Timestamp('today').floor('D'), \
        ['Publication type', 'Publication_Date','Title', 'Authors OR Creators', 'Editors OR Supervisors']]
    #sort rows by date
    df=df.sort_values(by=['Publication_Date'])
    return df


def save_upcomingpubs(df):
    # Save spreadsheet with today's date in filename and frozen top row
    datestring = datetime.strftime(datetime.now(), '%Y_%m_%d')
    writer = pd.ExcelWriter(datestring + '_publications.xlsx') 
    df.to_excel(writer, sheet_name='Publications', freeze_panes=(1,0))
    # Auto-adjust columns' width
    for column in df:
        column_width = max(df[column].astype(str).map(len).max(), len(column))
        col_idx = df.columns.get_loc(column)
        writer.sheets['Publications'].set_column(col_idx+1, col_idx+1, column_width+1)
    writer.save()


def main():
    df = prepare_dataframe()
    save_upcomingpubs(df)


if __name__ == "__main__":
    main()

#Run a quick check to ensure it's correct

In [11]:
# Report for scheduling tweets
# Before running this script, download the basic publication report file from RPS and rename it to 'publications'.
# Filter the file on publications with a blank 'Reporting Date 1'. 
# Check these publications and fill the publication date in manually on RPS if possible. 

import pandas as pd
import numpy as np
from datetime import datetime

def prepare_dataframe():
    df=pd.read_csv("publicationsbyuser.csv", index_col="ID")
    # Convert reporting date column to datetime in day-month-year format
    df['Reporting date 1'] = pd.to_datetime(df['Reporting date 1'], dayfirst=True)
    df['Online publication date'] = pd.to_datetime(df['Online publication date'], dayfirst=True)
    # Create a date column which takes the first online publication date if earlier than the reporting date 1
    df['Publication_Date'] = np.where(df['Online publication date']<df['Reporting date 1'], df['Online publication date'], df['Reporting date 1'])
    # Filter to include only publications from one month ago. Include only relevant columns.
    df=df.loc[df['Publication_Date']>= (pd.Timestamp('today').floor('D') - pd.Timedelta(days=31).floor('D')), \
        ['Publication type', 'Publication_Date','Email', 'Title', 'Editors OR Supervisors']]
    #sort rows by date
    df=df.sort_values(by=['Publication_Date'])
    return df


def save_recentpubsbyuser(df):
    # Save spreadsheet with today's date in filename and frozen top row
    datestring = datetime.strftime(datetime.now(), '%Y_%m_%d')
    writer = pd.ExcelWriter(datestring + '_pubsbyuser.xlsx') 
    df.to_excel(writer, sheet_name='Publications', freeze_panes=(1,0))
    # Auto-adjust columns' width
    for column in df:
        column_width = max(df[column].astype(str).map(len).max(), len(column))
        col_idx = df.columns.get_loc(column)
        writer.sheets['Publications'].set_column(col_idx+1, col_idx+1, column_width+1)
    writer.save()


def main():
    df = prepare_dataframe()
    save_recentpubsbyuser(df)


if __name__ == "__main__":
    main()

#Run a quick check to ensure it's correct

  exec(code_obj, self.user_global_ns, self.user_ns)


In [22]:
# Publications list for Staff, Publications and Projects report
# Before running this script, download the basic publication report file from RPS and rename it to 'publications'.
# Filter the file on publications with a blank 'Reporting Date 1'. 
# Check these publications and fill the publication date in manually on RPS if possible. 

import pandas as pd
import numpy as np
from datetime import datetime

def prepare_dataframe():
    df=pd.read_csv("publications.csv", index_col="ID")
    # Convert reporting date column to datetime in day-month-year format
    df['Reporting date 1'] = pd.to_datetime(df['Reporting date 1'], dayfirst=True)
    df['Online publication date'] = pd.to_datetime(df['Online publication date'], dayfirst=True)
    # Create a date column which takes the first online publication date if earlier than the reporting date 1
    df['Publication_Date'] = np.where(df['Online publication date']<df['Reporting date 1'], df['Online publication date'], df['Reporting date 1'])
    #sort rows by date
    df=df.sort_values(by=['Publication_Date'])
    return df


def save_publicationslist(df):
    # Save spreadsheet with today's date in filename and frozen top row
    datestring = datetime.strftime(datetime.now(), '%Y_%m_%d')
    writer = pd.ExcelWriter(datestring + '_publications.xlsx') 
    df.to_excel(writer, sheet_name='Publications', freeze_panes=(1,0), \
        columns=['Authors OR Creators','Editors OR Supervisors','Publication_Date','Publication type', 'Published proceedings OR Journal', 'Title'])
    # Auto-adjust columns' width
    for column in df:
        column_width = max(df[column].astype(str).map(len).max(), len(column))
        col_idx = df.columns.get_loc(column)
        writer.sheets['Publications'].set_column(col_idx+1, col_idx+1, column_width+1)
    writer.save()


def main():
    df = prepare_dataframe()
    save_publicationslist(df)


if __name__ == "__main__":
    main()

#Run a quick check to ensure it's correct

  exec(code_obj, self.user_global_ns, self.user_ns)
