In [1]:
from dotenv import load_dotenv
load_dotenv()

import os
import pandas as pd
from sqlalchemy import create_engine

password = os.environ["MARIA_DB_PASS"]
connection = create_engine(f"mariadb+pymysql://flutter_app:{password}@82.223.54.117:3306/rubik_app_db")
# here we can reuse same connection and "never" close it, in an API you cannot because there can be collisions

from utils import clean_commmutator, commutator_to_sticker_commutator, unravel_commutator_wrt_master

## Read comms csvs guille

In [11]:
comms_guille = pd.read_excel("./edges_commutators_tratados.xlsx").drop_duplicates(["first_letter", "second_letter"])
comms_guille = comms_guille[["first_letter", "second_letter", "commutator"]].copy()
comms_guille.sample(5)

Unnamed: 0,first_letter,second_letter,commutator
484,Z,G,U'MU'M'U2M'UMU'
184,W,H,x L2 (Can M. KÑ. Can M') L2 x
305,Q,G,R (KG.) R'
706,S,W,uR (NC.) R'u'
538,W,Y,D'RDMD'R'DM''


## Clean comms

In [12]:
comms_guille["clean_commutator"] = comms_guille.commutator.apply(clean_commmutator)
comms_guille.sample(5)

Unnamed: 0,first_letter,second_letter,commutator,clean_commutator
11,Z,H,M'UL'U'MULU',M'UL'U'MULU'
21,C,Ñ,DR2U'M'UR2U'MUD',DR2U'M'UR2U'MUD'
206,D,F,L'UM2U'L2UM2U'L',L'UM2U'L2UM2U'L'
532,N,B,(Ins N.) M2' (Unins N.) M2',(Ins N.) M2' (Unins N.) M2'
78,K,Y,rM'U'R'UMU'RUr' Y),rM'U'R'UMU'RUr' Y)


## Sticker comms

edges o corners

In [13]:
do_edges = True

if do_edges:
    letters_guille = pd.read_sql("SELECT sticker, letter FROM edges_stickers WHERE user_name='flygorithm'", connection).set_index("letter").sticker
else:
    letters_guille = pd.read_sql("SELECT sticker, letter FROM corners_stickers WHERE user_name='flygorithm'", connection).set_index("letter").sticker

In [14]:
comms_guille["commutator_stickers"] = comms_guille.clean_commutator.apply(commutator_to_sticker_commutator, letter2sticker=letters_guille)
comms_guille["first_sticker"] = comms_guille.first_letter.apply(lambda l: letters_guille.get(l, ""))
comms_guille["second_sticker"] = comms_guille.second_letter.apply(lambda l: letters_guille.get(l, ""))

In [6]:
comms_guille = comms_guille[["first_sticker", "second_sticker", "commutator_stickers"]].set_index(["first_sticker", "second_sticker"])

In [15]:
comms_guille.sample(5)

Unnamed: 0,first_letter,second_letter,commutator,clean_commutator,commutator_stickers,first_sticker,second_sticker
210,G,D,L' (GN.) L,L' (GN) L,"L' {DB,UL} L",DB,FL
106,W,J,S' (WK.) S,S' (WK) S,"S' {RB,RU} S",RB,DR
1,H,Ñ,L'U'M'ULU'MU,L'U'M'ULU'MU,L'U'M'ULU'MU,LF,LU
138,J,Y,M (Ins J.) M (Ins J.) M2,M (Ins J.) M (Ins J.) M2,,DR,BD
3,J,Ñ,M'U'MD'M'UMD,M'U'MD'M'UMD,M'U'MD'M'UMD,DR,LU


In [16]:
comms_guille.shape

(436, 7)

## Estudiar los que no se parsean bien

In [35]:
buenos = comms_guille[comms_guille.commutator_stickers != ""]

In [19]:
malos = comms_guille[comms_guille.commutator_stickers == ""]

In [20]:
malos.shape

(95, 7)

Cuales son del tipo Ins o Unins

In [27]:
malos.clean_commutator.str.contains("ns").sum(), malos.clean_commutator.str.contains("ns").mean().round(2) 

(44, 0.46)

In [28]:
malos = malos[~malos.clean_commutator.str.contains("ns")]

In [29]:
malos.shape

(51, 7)

In [31]:
malos.clean_commutator.str.contains("LL").sum(), malos.clean_commutator.str.contains("LL").mean().round(2) 

(21, 0.41)

In [32]:
malos = malos[~malos.clean_commutator.str.contains("LL")]

In [33]:
malos.shape

(30, 7)

In [34]:
malos

Unnamed: 0,first_letter,second_letter,commutator,clean_commutator,commutator_stickers,first_sticker,second_sticker
126,C,J,U' (BJ. Can U'.),U' (BJ. Can U'.),,UR,DR
127,J,C,(Can U' JB.) U,(Can U' JB.) U,,DR,UR
184,W,H,x L2 (Can M. KÑ. Can M') L2 x,x L2 M' (KÑ. Can M') L2 x,,RB,LF
185,H,W,xL2 (Can M. ÑK. Can M'.) L2 x,xL2 M' (ÑK. Can M'.) L2 x,,LF,RB
248,M,K,u ('Z.) u',u ('Z.) u',,BR,RU
250,K,M,u (Z'.) u',u (Z'.) u',,RU,BR
258,C,M,U' (BM. Can U'.),U' (BM. Can U'.),,UR,BR
306,Y,Q,R (YK. Can M2 R') r2,R (YK. Can M2 R') r2,,BD,RF
350,T,N,(Can U' TB.) U',(Can U' TB.) U',,DL,UL
355,N,F,U' (BF. Can U.),U' (BF. Can U.),,UL,BL


In [132]:
comms_guille = comms_guille[comms_guille.commutator_stickers.apply(lambda c: c.strip()) != ""]

In [133]:
comms_guille.shape

(341, 1)

In [137]:
comms_guille["commutator_full"] = comms_guille.commutator_stickers.apply(unravel_commutator_wrt_master, all_comms=comms_guille)

  unraveled = all_comms.loc[first_sticker, second_sticker].iloc[0].iloc[0]


In [138]:
comms_guille = comms_guille[comms_guille.commutator_full.apply(lambda c: c.strip()) != ""]

In [139]:
comms_guille.shape

(292, 2)

In [140]:
comms_guille.sample(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,commutator_stickers,commutator_full
first_sticker,second_sticker,Unnamed: 2_level_1,Unnamed: 3_level_1
BU,DB,UMU'M'U2M'UMU,UMU'M'U2M'UMU
LF,BR,"z' U2 {FU,RF} U2 z",z' U2 M'U'RUMU'R'U U2 z
UF,BL,"U' L {UR,UL} LU",U' L U'M'U2MU' LU
DL,BL,L2UM2U'L'UM2U'L',L2UM2U'L'UM2U'L'
LU,DB,U2MU'M'U2M'UM,U2MU'M'U2M'UM


In [141]:
from objects import Commutator

In [142]:
comms_guille.commutator_full = comms_guille.commutator_full.apply(lambda c: Commutator(c).full_form)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  comms_guille.commutator_full = comms_guille.commutator_full.apply(lambda c: Commutator(c).full_form)


In [143]:
comms_guille["commutator_simplified"] = comms_guille.commutator_full.apply(lambda c: Commutator(c).simplified_form)
comms_guille.commutator_simplified = comms_guille.commutator_simplified.apply(lambda c: c if "+" not in c else "")
comms_guille.commutator_simplified = comms_guille.commutator_simplified.apply(lambda c: c if "Not" not in c else "")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  comms_guille["commutator_simplified"] = comms_guille.commutator_full.apply(lambda c: Commutator(c).simplified_form)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  comms_guille.commutator_simplified = comms_guille.commutator_simplified.apply(lambda c: c if "+" not in c else "")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returni

In [144]:
comms_guille.rename(columns={"commutator_full": "commutator"}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  comms_guille.rename(columns={"commutator_full": "commutator"}, inplace=True)


In [145]:
comms_guille["buffer_sticker"] = "DF"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  comms_guille["buffer_sticker"] = "DF"


In [146]:
comms_guille = comms_guille.reset_index()[["buffer_sticker", "first_sticker", "second_sticker", "commutator", "commutator_simplified"]]

In [158]:
comms_guille.sample(5)

Unnamed: 0,buffer_sticker,first_sticker,second_sticker,commutator,commutator_simplified
125,DF,LB,FL,z M U L' U' M' U L U' z Z',
195,DF,LB,LU,L U' M' U L' U' M U,"[L, U' M' U]"
267,DF,UB,DB,u2 M u2 M,
252,DF,BD,LU,x M2 U L U' M' U L' U' M' ',
111,DF,LF,BL,z M' U' L' U M U' L U Z,


In [159]:
comms_guille.shape

(292, 5)

In [160]:
comms_guille = comms_guille[comms_guille.first_sticker != ""].copy()
comms_guille = comms_guille[comms_guille.second_sticker != ""].copy()

In [161]:
comms_guille.shape

(285, 5)

## Dump to ddbb

In [162]:
comms_guille.to_sql('edges_commutators', con=connection, if_exists='replace', index=False)

285

## Letter pairs dump

In [5]:
df = pd.read_csv("/home/sheriff/Downloads/lingot_pairs_export_extended.csv")

In [6]:
df["user_name"] = "sheriff"

In [7]:
df.head()

Unnamed: 0,first_letter,second_letter,word,user_name
0,Br,Br,bribri,sheriff
1,Br,Sh,brush,sheriff
2,Br,Ch,brocha,sheriff
3,Br,Ko,brecol,sheriff
4,Br,Ka,Blanca,sheriff


In [9]:
df.to_sql('letter_pairs', con=connection, if_exists='append', index=False)

419