# Shuffle months in date trait

I would like to run a control where I shuffle the month for dates that have months and re-run the analysis to see if that alters the inferred time of cross-species transmission 

In [49]:
import sys, subprocess, glob, os, shutil, re, importlib, Bio, csv
from subprocess import call
from Bio import SeqIO
import pandas as pd
import numpy as np
from time import gmtime, strftime
import random 

import datetime
from datetime import datetime
from dateutil.parser import parse
import rpy2
%load_ext rpy2.ipython

In [56]:
def convert_calendar_date_to_decimal_date(date_string):
    
    # call in R to convert collection date to decimal date and store as a new variable
    %R require(lubridate); library(lubridate)
    %R -i date_string
    %R dateR <- ymd(date_string)
    %R dec_date = decimal_date(dateR)
    %R -o dec_date
    decimal_date = str(dec_date).replace("[1] ","").strip()
        
    decimal_date = float(decimal_date)
    
    return(decimal_date)

In [125]:
def write_new_xml(input_xml, output_xml): 
    with open(output_xml, "w") as outfile: 
        outfile.write("")
        
    with open(input_xml, "r") as infile:
        for line in infile: 
            if line.startswith("            <trait id=\"dateTrait.t"):
                datestring = line.split("value=")[1]
                dates_list = datestring.replace("\"","").split(",")
                
                shuffled_dates = return_shuffled_dates_list(dates_list)
                new_date_line = line.split("value=")[0] + "value=" + shuffled_dates + ">\n"
                
                with open(output_xml, "a") as outfile: 
                    outfile.write(new_date_line)
            
            else: 
                with open(output_xml, "a") as outfile: 
                    outfile.write(line)

In [118]:
def return_shuffled_dates_list(date_list):
    shuffled_date_list = []
    
    for sequence in date_list:
        decimal_date = sequence.split("|")[1]
        long_date = sequence.split("|")[2]
        year = long_date.split("-")[0]
        month = long_date.split("-")[1]
        day = long_date.split("-")[2]
        assigned_date = sequence.split("=")[1]
        
        # if there is a specific month, choose a new, random month
        if month != "XX":
            new_long_date = return_random_date(year, month, day)
            new_decimal_date = str(convert_calendar_date_to_decimal_date(new_long_date))
        else: 
            new_long_date = long_date
            new_decimal_date = assigned_date
            
        new_sequence = sequence.split("=")[0] + "=" + new_decimal_date
        shuffled_date_list.append(new_sequence)
    
    # return a new list of shuffled dates 
    shuffled_dates = "\"" + ",".join(shuffled_date_list) + "\""

    return(shuffled_dates)

In [119]:
def return_random_date(year, month, day):   
    
    new_month = str(random.choice(['01','02','03','04','05','06','07','08','09','10','11','12']))
    
    # need to adjust for months that don't have day 31; just make them 29 
    if day in ["29","30","31"]:
        new_day = "28"
    elif day == "XX":
        new_day = "01"
    else:
        new_day = day
    
    new_long_date = year + "-" + new_month + "-" + new_day

    return(new_long_date)

In [130]:
template_xml = "beast-runs/2021-12-21-mascot-3deme-skyline-shuffled-months/2021-07-30-mascot-3deme-skyline-tipdates.xml"
output_xml = "beast-runs/2021-12-21-mascot-3deme-skyline-shuffled-months/2021-12-21-mascot-3deme-skyline-shuffled-months.xml"

In [131]:
for i in range(10):
    new_xml = output_xml.replace(".xml","-it" + str(i) + ".xml")
    write_new_xml(template_xml, new_xml)