In [1]:
import numpy as np
import polars as pl
import glob
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

* ## The following will require ~750 GB of hard drive space and ~400 GB of RAM/Swap memory.

* ## Please download all [TIC files](https://archive.stsci.edu/tess/tic_ctl.html) into a subdirectory 'tic' and [Gaia dr2_neighbourhood files](https://cdn.gea.esac.esa.int/Gaia/gedr3/auxiliary/dr2_neighbourhood/) into a subdirectory 'gaia.'

## Create one large TIC file with TIC ID and Gaia DR2 ID

In [2]:
# Specify the directory and columns
tic_directory = 'tic/'
columns = [0,8]

# Get a list of all csv.gz files in the directory
files = glob.glob(tic_directory + 'tic_dec*.csv.gz')

# Create an empty list to store the dataframes
dfs = []

# Loop over the files and read them into dataframes
for file in tqdm(files):
    df = pl.read_csv(file, columns=columns, has_header=False, new_columns=["TIC", "dr2_source_id"],
                     dtypes={"TIC": pl.Int64, "dr2_source_id": pl.Int64})
    dfs.append(df)

# Concatenate all dataframes into one
result = pl.concat(dfs, how="vertical")

result

100%|███████████████████████████████████████████| 90/90 [47:53<00:00, 31.92s/it]


TIC,dr2_source_id
i64,i64
649490745,567863400304597120
649521389,568631512255721984
649522563,568744040398246656
649522587,568744727593344896
649489483,567840482358622080
…,…
2041172943,1926286857958953728
2041173966,1926301426485931392
2041174354,1926307370720624256
2041162198,1926126874720155264


#### Sort the dataframe by Gaia DR2 ID and drop TIC targets without a Gaia DR2 ID

In [3]:
tic = result.sort("dr2_source_id")
tic = tic.drop_nulls()
tic

TIC,dr2_source_id
i64,i64
439902055,4295806720
439902052,34361129088
439902053,38655544960
439902049,309238066432
439902048,343597448960
…,…
1997435056,6917528963217645568
248849678,6917528993281571840
1997435057,6917528993281819008
1997435058,6917528993283204480


#### Save the TIC file!

In [4]:
tic.write_csv(tic_directory+"tic.csv",separator=",")

## Create one large Gaia DR2/DR3 file

In [5]:
# Specify the Gaia file directory
gaia_directory = 'gaia/'

# Get a list of all csv.gz files in the directory
files = glob.glob(gaia_directory + 'Dr2Neighbourhood_*.csv.gz')

# Create an empty list to store the dataframes
dfs = []

# Loop over the files, read them into dataframes, and perform some quality cuts
for file in tqdm(files):
    df = pl.read_csv(file, has_header=True, dtypes={"dr2_source_id": pl.Int64, "dr3_source_id": pl.Int64})
    
    # Drop proper motion propagation column
    df = df.drop("proper_motion_propagation")

    # Remove targets with distances greater than 100 mas and a G-band magnitude difference greater than 0.2 mag
    df = df.filter(df["angular_distance"] < 100)
    df = df.filter(np.abs(df["magnitude_difference"]) < 0.2)
    
    dfs.append(df)

# Concatenate all dataframes into one
result = pl.concat(dfs, how="vertical")

result

100%|█████████████████████████████████████████| 783/783 [06:24<00:00,  2.04it/s]


dr2_source_id,dr3_source_id,angular_distance,magnitude_difference
i64,i64,f64,f64
4341712624834004992,4341712624834004992,0.045163,-0.059719
3477842384888461312,3477842384888461312,0.466344,-0.111376
6349707584874251776,6349707584874251776,0.163466,-0.030685
4210285968450021888,4210285972751447040,0.093154,-0.021759
5859213768395513344,5859213768395513344,13.680017,-0.124718
…,…,…,…
6033100879525167744,6033100879525167744,0.091092,-0.011508
6029172633773262976,6029172633773262976,11.851263,-0.114599
4148884153918319744,4148884153918319744,0.1571634,-0.033903
6239338332800607744,6239338332800607744,0.4106341,-0.020346


#### Sort the dataframe by Gaia DR2 ID

In [6]:
# Sort by Gaia DR2 ID
gaia = result.sort("dr2_source_id")
gaia

dr2_source_id,dr3_source_id,angular_distance,magnitude_difference
i64,i64,f64,f64
4295806720,4295806720,0.139224,-0.012514
34361129088,34361129088,0.234338,-0.032408
38655544960,38655544960,0.074779,-0.012037
309238066432,309238066432,0.148353,-0.024946
343597448960,343597448960,0.1402964,-0.013454
…,…,…,…
6917528963217645568,6917528963217645568,2.2794328,-0.050938
6917528993281571840,6917528993281571840,0.316184,-0.014524
6917528993281819008,6917528993281819008,0.6179533,-0.033461
6917528993283204480,6917528993283204480,1.417878,-0.040546


#### Save the Gaia file!

In [7]:
gaia.write_csv(gaia_directory+"gaia.csv",separator=",")

## Merge the TIC and Gaia files

In [8]:
ticgaia = tic.join(gaia,on="dr2_source_id",how="inner")
ticgaia

TIC,dr2_source_id,dr3_source_id,angular_distance,magnitude_difference
i64,i64,i64,f64,f64
439902055,4295806720,4295806720,0.139224,-0.012514
439902052,34361129088,34361129088,0.234338,-0.032408
439902053,38655544960,38655544960,0.074779,-0.012037
439902049,309238066432,309238066432,0.148353,-0.024946
439902048,343597448960,343597448960,0.1402964,-0.013454
…,…,…,…,…
1997435056,6917528963217645568,6917528963217645568,2.2794328,-0.050938
248849678,6917528993281571840,6917528993281571840,0.316184,-0.014524
1997435057,6917528993281819008,6917528993281819008,0.6179533,-0.033461
1997435058,6917528993283204480,6917528993283204480,1.417878,-0.040546


#### Sort the merged dataframe by TIC ID

In [9]:
ticgaia = ticgaia.sort("TIC")
ticgaia

TIC,dr2_source_id,dr3_source_id,angular_distance,magnitude_difference
i64,i64,i64,f64,f64
1,6220232982534277760,6220232982534277760,0.031861,-0.012459
2,6220232913814800640,6220232913814800640,0.074949,-0.027599
3,6220232948174541696,6220232948174541696,0.063944,-0.010159
4,6220232948174542464,6220232948174542464,0.03279,-0.017881
5,6220233016894020352,6220233016894020352,0.1054018,-0.0109
…,…,…,…,…
2056111150,6627816234937364352,6627816234937364352,0.5947274,-0.051826
2056111151,6627816445390870656,6627816445390870656,3.8710597,-0.087215
2056111152,6627819258594345984,6627819258594345984,0.206485,-0.017647
2056111153,6627819297249758976,6627819297250043520,0.060031,-0.024467


#### Save the final file!

In [10]:
ticgaia.write_csv("ticdr2dr3.csv",separator=",")

#### **Bonus**: Identify which targets have multiple Gaia DR2/DR3 matches

In [11]:
ticgaiadup = ticgaia.filter(pl.col("TIC").is_duplicated())
ticgaiadup

TIC,dr2_source_id,dr3_source_id,angular_distance,magnitude_difference
i64,i64,i64,f64,f64
28294725,104991441918953984,104991441918953984,99.838585,-0.121298
28294725,104991441918953984,104991437623507200,97.942406,0.164927
33136230,4203641619689111168,4203641624045956864,88.82475,0.094122
33136230,4203641619689111168,4203641624045956736,98.77191,0.107105
203125275,4060983353237314176,4060983357525193216,97.750534,0.011034
…,…,…,…,…
1864487842,2031173395317380992,2031173395376312320,88.33441,0.150446
1866512827,2032067779335099008,2032067779335099008,88.09891,-0.10202
1866512827,2032067779335099008,2032067779335099136,91.134575,0.190321
1921419052,4515598052346828160,4515598052346828416,92.581635,-0.034456


In [12]:
ticgaia2dup = ticgaia.filter(pl.col("dr2_source_id").is_duplicated())
ticgaia2dup

TIC,dr2_source_id,dr3_source_id,angular_distance,magnitude_difference
i64,i64,i64,f64,f64
42,6220282902937913984,6220282907234553472,0.9073327,0.150242
608,6222018280179964928,6222018280179964928,0.159115,-0.029999
624,6222019826368197888,6222019826368197888,0.05881,-0.009862
954,6222119843271118720,6222119843271118720,0.177956,-0.028215
1037,6222123966438809216,6222123966438809216,1.969019,0.056524
…,…,…,…,…
2056110706,6624775952143225728,6624775952143225728,0.124918,-0.016433
2056110755,6624779388117042432,6624779388117042432,0.050214,-0.014225
2056110756,6624779418181868160,6624779418181868160,0.60752,-0.006236
2056110758,6624779452541688064,6624779452541688064,0.063082,-0.02961


In [13]:
ticgaia3dup = ticgaia.filter(pl.col("dr3_source_id").is_duplicated())
ticgaia3dup

TIC,dr2_source_id,dr3_source_id,angular_distance,magnitude_difference
i64,i64,i64,f64,f64
42,6220282902937913984,6220282907234553472,0.9073327,0.150242
608,6222018280179964928,6222018280179964928,0.159115,-0.029999
624,6222019826368197888,6222019826368197888,0.05881,-0.009862
954,6222119843271118720,6222119843271118720,0.177956,-0.028215
1037,6222123966438809216,6222123966438809216,1.969019,0.056524
…,…,…,…,…
2056110706,6624775952143225728,6624775952143225728,0.124918,-0.016433
2056110755,6624779388117042432,6624779388117042432,0.050214,-0.014225
2056110756,6624779418181868160,6624779418181868160,0.60752,-0.006236
2056110758,6624779452541688064,6624779452541688064,0.063082,-0.02961


In [14]:
ticgaia2dup = ticgaia2dup.sort("dr2_source_id")
ticgaia2dup

TIC,dr2_source_id,dr3_source_id,angular_distance,magnitude_difference
i64,i64,i64,f64,f64
347548683,8250632981888,8250632981888,0.118059,-0.013992
640000055,8250632981888,8250632981888,0.118059,-0.013992
347570976,17390323414400,17390323414400,0.154151,-0.020091
640000131,17390323414400,17390323414400,0.154151,-0.020091
347548475,25980257976960,25980257976960,35.415405,0.094322
…,…,…,…,…
1997433643,6917506590732371456,6917506590732371456,0.321679,-0.023991
248849874,6917520442001631104,6917520442001631104,0.128425,-0.016129
1997434506,6917520442001631104,6917520442001631104,0.128425,-0.016129
248849771,6917525591669159424,6917525591669159424,0.074064,-0.022373
