In [267]:
from google.cloud import bigquery
from google.cloud.bigquery import job
from google.cloud.bigquery import SchemaField
import pandas as pd
import datetime as dt
import numpy as np
import re
import os
from tqdm import tqdm
import pandas as pd
import copy
PROJCECT = 'ballosodeuk'
bq = bigquery.Client(project=PROJCECT)

In [268]:
# 클라이언트 설정
client = bigquery.Client()
query_name = "0711_동적쿼리테스트"
# SQL 파일 읽기
# file = './query/쿠팡0403_0509.sql'
file = f'../query/{query_name}.sql'
with open(file, 'r') as file:
    query = file.read()


In [468]:

# 쿼리 실행
job_config = bigquery.QueryJobConfig()
query_job = client.query(query, job_config=job_config)

df = query_job.to_dataframe()

In [271]:
df_copy = copy.deepcopy(df)

### 1. 신규 DF의 컬럼 추론
- 새로운 컬럼 생성 대응
- 기존 스키마 Order 로 정렬

In [272]:
def infer_schema_from_dataframe(df):
    """데이터프레임에서 스키마 추론."""
    def infer_field_type(value):
        if isinstance(value, dict):
            subfields = {k: infer_field_type(v) for k, v in value.items()}
            return {"type": "RECORD", "fields": subfields}
        elif isinstance(value, (np.ndarray, list)) and len(value) > 0 and isinstance(value[0], dict):
            subfields = {k: infer_field_type(v) for k, v in value[0].items()}
            return {"type": "RECORD", "mode": "REPEATED", "fields": subfields}
        elif isinstance(value, (np.ndarray, list)):
            return {"type": "STRING", "mode": "REPEATED"}
        elif pd.api.types.is_integer_dtype(type(value)):
            return "INTEGER"
        elif pd.api.types.is_float_dtype(type(value)):
            return "FLOAT"
        elif pd.api.types.is_bool_dtype(type(value)):
            return "BOOLEAN"
        elif pd.api.types.is_datetime64_any_dtype(type(value)):
            return "TIMESTAMP"
        elif isinstance(value, pd.Timestamp):
            return "DATE"
        else:
            return "STRING"
    
    schema = {}
    
    for row in range(len(df)):
        post_schema = {col: infer_field_type(df[col].iloc[row]) for col in df.columns}        
        schema = merge_schemas(schema, post_schema)

    return schema

### 2. 신규 Df의 filed 타겟 값 찾기

In [273]:
def find_tg_field(tg_schema):
    
    def find_keys(data, pattern):
        matches = []
        if isinstance(data, dict):
            for key, value in data.items():
                if re.match(pattern, key):
                    matches.append(key)
                if isinstance(value, dict):
                    matches.extend(find_keys(value, pattern))
                elif isinstance(value, list):
                    for item in value:
                        matches.extend(find_keys(item, pattern))
        return matches

    tg_field = tg_schema['e']['fields']['d']

    pattern = r'^_field'
    matches = find_keys(tg_field, pattern)

    return matches[0]    

### 2. 신규 DF의 field Key-Value 삭제

In [297]:
def pop_error_new_df(df,tg):
    print(f"삭제할 key는 {tg}")
    for index, row in df.iterrows():
        for e_item in row['e']:
            for d_item in e_item['d']:
                if tg in d_item:
                    d_item.pop(tg)

### 3. 기존 테이블 수집

In [275]:
def get_current_schema(table_ref):
    """기존 테이블의 스키마를 가져옵니다."""
    table = client.get_table(table_ref)
    # return {field.name: field for field in table.schema}
    return table.schema

### 4. 기존 테이블 스키마의 객체화

In [276]:
# 기존 스키마 추출
def convert_schema_fields(schema):
    def convert_field(field):
        if field.field_type == 'RECORD':
            return {
                "type": field.field_type,
                "mode": field.mode,
                "fields": {subfield.name: convert_field(subfield) for subfield in field.fields}
            }
        elif field.field_type == 'STRING' and field.mode == 'REPEATED':
            return {
                "type": field.field_type,
                "mode": field.mode,
                "fields": {subfield.name: convert_field(subfield) for subfield in field.fields} if field.fields else {}
            }
        else:
            return field.field_type

    return {field.name: convert_field(field) for field in schema}

### 5. 신/구 스키마 병합
- 교집합 요소는 구, 신규는 신

In [277]:
def merge_schemas(cur, new):
    if isinstance(cur, dict) and isinstance(new, dict):
        merged = {}
        for key in set(cur.keys()).union(set(new.keys())):
            if key in cur:
                if key in new:
                    if isinstance(cur[key], dict) and isinstance(new[key], dict):
                        merged[key] = merge_schemas(cur[key], new[key])
                    else:
                        merged[key] = cur[key]
                else:
                    merged[key] = cur[key]
            else:
                merged[key] = new[key]
        return merged
    else:
        return cur if key in cur else new

### 6. 통합 스키마 정렬

In [278]:
def order_schema(schema, order):
    ordered_schema = {}
    for key in order:
        if key in schema:
            ordered_schema[key] = schema[key]
    return ordered_schema

### 7. 통합 스키마의 BQ 스킴 변환

In [279]:
def convert_to_schema_fields(schema):
    """스키마 딕셔너리를 SchemaField 객체로 변환."""
    fields = []
    for name, dtype in schema.items():
        if isinstance(dtype, dict):
            subfields = convert_to_schema_fields(dtype.get("fields", {}))
            mode = dtype.get("mode", "NULLABLE")
            fields.append(SchemaField(name, dtype["type"], mode=mode, fields=subfields))
        else:
            fields.append(SchemaField(name, dtype))
    return fields


### 8. 기존 BQ 테이블 스키마 업데이트

In [280]:
def update_table_schema(table_ref, merged_schema):
    """테이블 스키마에 새 열을 추가합니다."""
    table = client.get_table(table_ref)
    table.schema = merged_schema
    client.update_table(table, ["schema"])
    print(f"테이블 {table_ref}의 스키마가 업데이트되었습니다: {merged_schema}")
    

### 10. BQ 테이블로 현재 DF 업로드

In [281]:
def load_data_to_bigquery(df, table_ref):
    """데이터프레임을 빅쿼리 테이블로 업로드합니다."""
    client = bigquery.Client()

    # Load job configuration
    job_config = bigquery.LoadJobConfig(
        write_disposition="WRITE_APPEND",
        schema_update_options=["ALLOW_FIELD_ADDITION"]
    )
    
    # 스키마 검증
    table = client.get_table(table_ref)
    print("Current table schema:", table.schema)
    
    # 데이터 업로드
    job = client.load_table_from_dataframe(df, table_ref, job_config=job_config)
    job.result()
    print(f"테이블 {table_ref}에 데이터가 성공적으로 업로드되었습니다.")
    
load_data_to_bigquery(df, table_ref)

## 실행

In [291]:
df = copy.deepcopy(df_copy)

In [289]:
# 기존 데이터셋 수집
dataset_id = 'airbridge_mart'
table_id = 'app_df_2'
table_ref = f'ballosodeuk.{dataset_id}.{table_id}'

In [470]:
df['Event_Date'] = pd.to_datetime(df.Event_Date)
new_schema = infer_schema_from_dataframe(df)
tg_field = find_tg_field(new_schema)
pop_error_new_df(df,tg_field)
new_schema = infer_schema_from_dataframe(df)
cur_schema = get_current_schema(table_ref)
cur_schema = convert_schema_fields(cur_schema)
merged_schema = merge_schemas(cur_schema, new_schema)
merged_schema = order_schema(merged_schema, df.columns)
merged_schema = convert_to_schema_fields(merged_schema)
update_table_schema(table_ref, merged_schema)
load_data_to_bigquery(df.loc[1:10], table_ref)


BadRequest: 400 Invalid schema update. Cannot add fields (field: e.d._field_10); reason: invalid, message: Invalid schema update. Cannot add fields (field: e.d._field_10)

In [236]:
pd.api.types.is_bool_dtype(type(df.loc[0]['i']['Is_Re_engagement']))

True

In [407]:
list(df.sample(10)['e'])[0][0]['d'][0]

{'Timestamp': '2024-07-11T12:19:38+09:00',
 'Label': '상품상세_구매하기',
 'Action': '아이스 카페 아메리카노 T',
 'Value': 6429.0,
 'tester': 'true',
 'name': None,
 'price': None,
 'productID': None,
 'transactionID': None,
 'products_struct_alias': array([{'name': '아이스 카페 아메리카노 T', 'price': '6429', 'position': None}],
       dtype=object)}

In [167]:
df['e'][0][0]['d'][0]

{'Timestamp': '2024-07-11T15:57:48+09:00',
 'Label': '상품상세_구매하기',
 'Action': '동서)스타벅스커피라떼컵200ml',
 'Value': 3000.0,
 'tester': 'true',
 'name': None,
 'price': None,
 'productID': None,
 'transactionID': None,
 'products_struct_alias': array([{'name': '동서)스타벅스커피라떼컵200ml', 'price': '3000', 'position': None}],
       dtype=object)}

In [168]:
df['e']

0       [{'Label': '상품상세_구매하기', 'Action': '동서)스타벅스커피라떼...
1       [{'Label': '상품상세_구매하기', 'Action': '아이스 카페 아메리카...
2       [{'Label': '상품상세_구매하기', 'Action': '아메리카노(ICED)...
3       [{'Label': '상품상세_구매하기', 'Action': '농심)너구리얼큰(봉지...
4       [{'Label': '상품상세_구매하기', 'Action': '싸이버거', 'Eve...
                              ...                        
1471    [{'Label': '상품상세_구매하기', 'Action': '허니콤보+콜라1.25...
1472    [{'Label': '상품상세_구매하기', 'Action': '패밀리 아이스크림',...
1473    [{'Label': '상품상세_구매하기', 'Action': '패밀리 아이스크림',...
1474    [{'Label': '상품상세_구매하기', 'Action': '허니콤보+콜라1.25...
1475    [{'Label': '상품상세_구매하기', 'Action': '허니콤보+콜라1.25...
Name: e, Length: 1476, dtype: object

In [169]:
merged_schema

{'Event_Category': 'STRING',
 'Event_Count_Total': 'INTEGER',
 'Airbridge_Device_ID_Type': 'STRING',
 'Event_Date': 'STRING',
 'i': {'fields': {'Target_Event_Timestamp': 'STRING',
   'Client_IP_City': 'STRING',
   'Is_First_Event_per_Device_ID': 'BOOLEAN',
   'Device_Type': 'STRING',
   'Term_ID': 'STRING',
   'Client_IP_Country_Code': 'STRING',
   'Platform': 'STRING',
   'Is_First_Event_per_User_ID': 'BOOLEAN',
   'Ad_Creative_ID': 'STRING',
   'Campaign_ID': 'STRING',
   'Device_Model': 'STRING',
   'Ad_Group_ID': 'STRING',
   'Target_Event_Category': 'STRING',
   'Client_IP_Subdivision': 'STRING',
   'Is_Re_engagement': 'STRING',
   'Is_First_Target_Event_per_Device': 'STRING'},
  'type': 'RECORD',
  'mode': 'NULLABLE'},
 'Event_Value_Total': 'FLOAT',
 'e': {'fields': {'Event_Value_Sum': 'FLOAT',
   'Action': 'STRING',
   'Event_Count': 'INTEGER',
   'u': {'type': 'RECORD',
    'mode': 'REPEATED',
    'fields': {'User_ID': 'STRING'}},
   'Label': 'STRING',
   'd': {'fields': {'name

In [251]:
cur_schema

{'Event_Date': 'DATE',
 'Airbridge_Device_ID': 'STRING',
 'Airbridge_Device_ID_Type': 'STRING',
 'i': {'type': 'RECORD',
  'mode': 'NULLABLE',
  'fields': {'Device_Model': 'STRING',
   'Device_Type': 'STRING',
   'Platform': 'STRING',
   'Client_IP_Country_Code': 'STRING',
   'Client_IP_Subdivision': 'STRING',
   'Client_IP_City': 'STRING',
   'Campaign_ID': 'STRING',
   'Ad_Group_ID': 'STRING',
   'Ad_Creative_ID': 'STRING',
   'Term_ID': 'STRING',
   'Is_Re_engagement': 'BOOLEAN',
   'Is_First_Event_per_User_ID': 'BOOLEAN',
   'Is_First_Event_per_Device_ID': 'BOOLEAN',
   'Is_First_Target_Event_per_Device': 'BOOLEAN',
   'Target_Event_Timestamp': 'NUMERIC',
   'Target_Event_Category': 'STRING'}},
 'Event_Category': 'STRING',
 'Event_Value_Total': 'FLOAT',
 'Event_Count_Total': 'INTEGER',
 'e': {'type': 'RECORD',
  'mode': 'REPEATED',
  'fields': {'Label': 'STRING',
   'Action': 'STRING',
   'Event_Value_Sum': 'FLOAT',
   'Event_Count': 'INTEGER',
   'u': {'type': 'RECORD',
    'mode'

#### 테스트 시발거

In [260]:
def merge_schemas(cur, new):
    if isinstance(cur, dict) and isinstance(new, dict):
        merged = {}
        for key in set(cur.keys()).union(set(new.keys())):
            if key in cur:
                if key in new:
                    if isinstance(cur[key], dict) and isinstance(new[key], dict):
                        merged[key] = merge_schemas(cur[key], new[key])
                    else:
                        merged[key] = cur[key]
                else:
                    merged[key] = cur[key]
            else:
                merged[key] = new[key]
        return merged
    else:
        return cur if key in cur else new

# Example usage
cur = {
    'Target_Event_Timestamp': 'NUMERIC',
    'Event_Date': 'STRING',
    'nested': {
        'field1': 'INTEGER',
        'field2': 'STRING'
    }
}

new = {
    'Target_Event_Timestamp': 'STRING',
    'Event_Date': 'STRING',
    'nested': {
        'field1': 'INTEGER',
        'field3': 'FLOAT'
    },
    'new_field': 'BOOLEAN'
}

merged_schema = merge_schemas(cur, new)
print(merged_schema)


{'Target_Event_Timestamp': 'NUMERIC', 'Event_Date': 'STRING', 'nested': {'field2': 'STRING', 'field1': 'INTEGER', 'field3': 'FLOAT'}, 'new_field': 'BOOLEAN'}


In [285]:
merged_schema

{'Event_Category': 'STRING',
 'Event_Count_Total': 'INTEGER',
 'Airbridge_Device_ID_Type': 'STRING',
 'Event_Date': 'DATE',
 'i': {'fields': {'Target_Event_Timestamp': 'NUMERIC',
   'Client_IP_City': 'STRING',
   'Is_First_Event_per_Device_ID': 'BOOLEAN',
   'Device_Type': 'STRING',
   'Term_ID': 'STRING',
   'Client_IP_Country_Code': 'STRING',
   'Platform': 'STRING',
   'Is_First_Event_per_User_ID': 'BOOLEAN',
   'Ad_Creative_ID': 'STRING',
   'Campaign_ID': 'STRING',
   'Device_Model': 'STRING',
   'Ad_Group_ID': 'STRING',
   'Target_Event_Category': 'STRING',
   'Client_IP_Subdivision': 'STRING',
   'Is_Re_engagement': 'BOOLEAN',
   'Is_First_Target_Event_per_Device': 'BOOLEAN'},
  'type': 'RECORD',
  'mode': 'NULLABLE'},
 'Event_Value_Total': 'FLOAT',
 'e': {'fields': {'Event_Value_Sum': 'FLOAT',
   'Action': 'STRING',
   'Event_Count': 'INTEGER',
   'u': {'fields': {'User_ID': 'STRING'},
    'type': 'RECORD',
    'mode': 'REPEATED'},
   'Label': 'STRING',
   'd': {'fields': {'nam

In [259]:
cur_schema

{'Event_Date': 'DATE',
 'Airbridge_Device_ID': 'STRING',
 'Airbridge_Device_ID_Type': 'STRING',
 'i': {'type': 'RECORD',
  'mode': 'NULLABLE',
  'fields': {'Device_Model': 'STRING',
   'Device_Type': 'STRING',
   'Platform': 'STRING',
   'Client_IP_Country_Code': 'STRING',
   'Client_IP_Subdivision': 'STRING',
   'Client_IP_City': 'STRING',
   'Campaign_ID': 'STRING',
   'Ad_Group_ID': 'STRING',
   'Ad_Creative_ID': 'STRING',
   'Term_ID': 'STRING',
   'Is_Re_engagement': 'BOOLEAN',
   'Is_First_Event_per_User_ID': 'BOOLEAN',
   'Is_First_Event_per_Device_ID': 'BOOLEAN',
   'Is_First_Target_Event_per_Device': 'BOOLEAN',
   'Target_Event_Timestamp': 'NUMERIC',
   'Target_Event_Category': 'STRING'}},
 'Event_Category': 'STRING',
 'Event_Value_Total': 'FLOAT',
 'Event_Count_Total': 'INTEGER',
 'e': {'type': 'RECORD',
  'mode': 'REPEATED',
  'fields': {'Label': 'STRING',
   'Action': 'STRING',
   'Event_Value_Sum': 'FLOAT',
   'Event_Count': 'INTEGER',
   'u': {'type': 'RECORD',
    'mode'

In [254]:
test_schema = {'Event_Count_Total': 'INTEGER',
 'Event_Category': 'STRINGㅁ',
 'Airbridge_Device_ID_Type': 'STRING',
 'Event_Date': 'DATE',
 'i': {'fields': {'Target_Event_Timestamp': 'STRING',
   'Client_IP_City': 'STRING',
   'Is_First_Event_per_Device_ID': 'BOOLEAN',
   'Device_Type': 'STRING',
   'Term_ID': 'STRING',
   'Client_IP_Country_Code': 'STRING',
   'Platform': 'STRING',
   'Is_First_Event_per_User_ID': 'BOOLEAN',
   'Ad_Creative_ID': 'STRING',
   'Campaign_ID': 'STRING',
   'Device_Model': 'STRING',
   'Ad_Group_ID': 'STRING',
   'Target_Event_Category': 'STRING',
   'Client_IP_Subdivision': 'STRING',
   'Is_Re_engagement': 'STRING',
   'Is_First_Target_Event_per_Device': 'STRING'},
  'type': 'RECORD'},
 'Event_Value_Total': 'FLOAT',
 'e': {'type': 'RECORD',
  'mode': 'REPEATED',
  'fields': {'Label': 'STRING',
   'Action': 'STRING',
   'Event_Value_Sum': 'FLOAT',
   'Event_Count': 'INTEGER',
   'u': {'type': 'RECORD',
    'mode': 'REPEATED',
    'fields': {'User_ID': 'STRING', 'newnewnew' : 'STRING'}},
   'd': {'type': 'RECORD',
    'mode': 'REPEATED',
    'fields': {'Timestamp': 'STRING',
     'Label': 'STRING',
     'Action': 'STRING',
     'Value': 'FLOAT',
     'tester': 'STRING',
     'name': 'STRING',
     'price': 'STRING',
     'productID': 'STRING',
     'transactionID': 'STRING',
     'products_struct_alias': {'type': 'RECORD',
      'mode': 'REPEATED',
      'fields': {'name': 'STRING',
       'price': 'STRING',
       'position': 'STRING',
       'newnewnew' : 'STRING'
       }}}}}},
 'Airbridge_Device_ID': 'STRING'}

In [302]:
cur_schema

{'Event_Date': 'DATE',
 'Airbridge_Device_ID': 'STRING',
 'Airbridge_Device_ID_Type': 'STRING',
 'i': {'type': 'RECORD',
  'mode': 'NULLABLE',
  'fields': {'Device_Model': 'STRING',
   'Device_Type': 'STRING',
   'Platform': 'STRING',
   'Client_IP_Country_Code': 'STRING',
   'Client_IP_Subdivision': 'STRING',
   'Client_IP_City': 'STRING',
   'Campaign_ID': 'STRING',
   'Ad_Group_ID': 'STRING',
   'Ad_Creative_ID': 'STRING',
   'Term_ID': 'STRING',
   'Is_Re_engagement': 'BOOLEAN',
   'Is_First_Event_per_User_ID': 'BOOLEAN',
   'Is_First_Event_per_Device_ID': 'BOOLEAN',
   'Is_First_Target_Event_per_Device': 'BOOLEAN',
   'Target_Event_Timestamp': 'NUMERIC',
   'Target_Event_Category': 'STRING'}},
 'Event_Category': 'STRING',
 'Event_Value_Total': 'FLOAT',
 'Event_Count_Total': 'INTEGER',
 'e': {'type': 'RECORD',
  'mode': 'REPEATED',
  'fields': {'Label': 'STRING',
   'Action': 'STRING',
   'Event_Value_Sum': 'FLOAT',
   'Event_Count': 'INTEGER',
   'u': {'type': 'RECORD',
    'mode'

In [None]:
for i in range(len(df)):
    list(df.sample(10)['e'])[0][0]['d'][0]

In [413]:
df.loc[0]['e'][1
               ]


{'Label': '상품상세_구매하기',
 'Action': '제주 말차 크림 프라푸치노 T',
 'Event_Value_Sum': 9000.0,
 'Event_Count': 1,
 'u': array([{'User_ID': '04432efd-e80f-4800-a0ba-ef4564abf3e2'}], dtype=object),
 'd': array([{'Timestamp': '2024-07-11T15:34:46+09:00', 'Label': '상품상세_구매하기', 'Action': '제주 말차 크림 프라푸치노 T', 'Value': 9000.0, 'tester': 'true', 'name': None, 'price': None, 'productID': None, 'transactionID': None, 'products_struct_alias': array([{'name': '제주 말차 크림 프라푸치노 T', 'price': '9000', 'position': None}],
              dtype=object)}                                                                                                                                                                                                                                                                                      ],
       dtype=object)}

In [453]:
df.loc[1:5]

Unnamed: 0,Event_Date,Airbridge_Device_ID,Airbridge_Device_ID_Type,i,Event_Category,Event_Value_Total,Event_Count_Total,e
1,2024-07-11,2B81606D-06E3-4554-9EA8-CED9801AC877,IDFV,"{'Device_Model': 'iPhone', 'Device_Type': 'mob...",Spend Credits (App),20714.0,1,"[{'Label': '상품상세_구매하기', 'Action': '아이스 카페 아메리카..."
2,2024-07-11,50f7e8d2-2d32-415f-9326-5599e8e6bcca,GAID,"{'Device_Model': 'SM-G996N', 'Device_Type': 'm...",Spend Credits (App),11143.0,3,"[{'Label': '상품상세_구매하기', 'Action': '아메리카노(ICED)..."
3,2024-07-11,a92b7b27-0da1-449b-bb85-e76710382bfc,GAID,"{'Device_Model': 'SM-S928N', 'Device_Type': 'm...",Spend Credits (App),4713.0,3,"[{'Label': '상품상세_구매하기', 'Action': '농심)너구리얼큰(봉지..."
4,2024-07-11,74b7ddb8-528e-47c6-8b3e-c4839771f84c,GAID,"{'Device_Model': 'SM-G781N', 'Device_Type': 'm...",Spend Credits (App),19713.0,3,"[{'Label': '상품상세_구매하기', 'Action': '싸이버거', 'Eve..."
5,2024-07-11,E4E6FBE5-1FF1-45A3-9375-5F37F8FC4D05,IDFA,"{'Device_Model': 'iPhone', 'Device_Type': 'mob...",Spend Credits (App),24143.0,1,"[{'Label': '상품상세_구매하기', 'Action': '아이스 카페 라떼 T..."


In [460]:
def check_for_field_10(df):
    for index, row in df.iterrows():
        if 'e' in row and isinstance(row['e'], list):
            for e_item in row['e']:
                if 'd' in e_item and isinstance(e_item['d'], list):
                    for d_item in e_item['d']:
                        if 'field_10' in d_item:
                            print(f"field_10 found in row {index}")
                            return True
    return False

# Usage example
df = pd.DataFrame({
    # 예시 데이터프레임 구성
})

field_10_exists = check_for_field_10(df)
print("field_10 exists:", field_10_exists)


field_10 exists: False


In [467]:
table_ref

'ballosodeuk.airbridge_mart.app_df_2'

In [472]:
def load_data_to_bigquery(df, table_ref):
    """데이터프레임을 빅쿼리 테이블로 업로드합니다."""
    client = bigquery.Client()

    # Load job configuration
    job_config = bigquery.LoadJobConfig(
        write_disposition="WRITE_APPEND",
        schema_update_options=["ALLOW_FIELD_ADDITION"]
    )
    
    # 스키마 검증
    table = client.get_table(table_ref)
    print("Current table schema:", table.schema)
    
    # 데이터 업로드
    job = client.load_table_from_dataframe(df, table_ref, job_config=job_config)
    job.result()
    print(f"테이블 {table_ref}에 데이터가 성공적으로 업로드되었습니다.")
    
load_data_to_bigquery(df, table_ref)

Current table schema: [SchemaField('Event_Date', 'DATE', 'NULLABLE', None, None, (), None), SchemaField('Airbridge_Device_ID', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('Airbridge_Device_ID_Type', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('i', 'RECORD', 'NULLABLE', None, None, (SchemaField('Device_Model', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('Device_Type', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('Platform', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('Client_IP_Country_Code', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('Client_IP_Subdivision', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('Client_IP_City', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('Campaign_ID', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('Ad_Group_ID', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('Ad_Creative_ID', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('Term_

In [465]:
table_ref

'ballosodeuk.airbridge_mart.app_df_2'

In [462]:
job.result()

AttributeError: module 'google.cloud.bigquery.job' has no attribute 'result'

In [471]:
def check_for_field(df, field_name):
    """Check if a specific field exists in the dataframe."""
    for index, row in df.iterrows():
        for e_item in row['e']:
            for d_item in e_item['d']:
                if field_name in d_item:
                    print(f"Field {field_name} found in row {index}")
                    return True
    return False

# Check for field_10 in dataframe
field_exists = check_for_field(df, 'field_10')
print("field_10 exists:", field_exists)

def update_table_schema(table_ref, merged_schema):
    """테이블 스키마에 새 열을 추가합니다."""
    table = client.get_table(table_ref)
    table.schema = merged_schema
    table = client.update_table(table, ["schema"])
    print(f"테이블 {table_ref}의 스키마가 업데이트되었습니다.")
    return table.schema

def load_data_to_bigquery(df, table_ref):
    """데이터프레임을 빅쿼리 테이블로 업로드합니다."""
    client = bigquery.Client()
    
    # 스키마 검증
    table = client.get_table(table_ref)
    print("Current table schema:", table.schema)
    
    job = client.load_table_from_dataframe(df, table_ref)
    job.result()
    print(f"테이블 {table_ref}에 데이터가 성공적으로 업로드되었습니다.")

update_table_schema(table_ref, merged_schema)
load_data_to_bigquery(df.loc[1:10], table_ref)

field_10 exists: False
테이블 ballosodeuk.airbridge_mart.app_df_2의 스키마가 업데이트되었습니다.
Current table schema: [SchemaField('Event_Date', 'DATE', 'NULLABLE', None, None, (), None), SchemaField('Airbridge_Device_ID', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('Airbridge_Device_ID_Type', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('i', 'RECORD', 'NULLABLE', None, None, (SchemaField('Device_Model', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('Device_Type', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('Platform', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('Client_IP_Country_Code', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('Client_IP_Subdivision', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('Client_IP_City', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('Campaign_ID', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('Ad_Group_ID', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('

BadRequest: 400 Invalid schema update. Cannot add fields (field: e.d._field_10); reason: invalid, message: Invalid schema update. Cannot add fields (field: e.d._field_10)