-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
3aed1c4
commit 7cde994
Showing
25 changed files
with
2,115 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,3 +10,4 @@ | |
^CONTRIBUTING.md | ||
^ISSUE_TEMPLATE.md | ||
^LICENSE | ||
^presentations.* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,26 @@ | ||
*.aux | ||
*.lof | ||
*.log | ||
*.lot | ||
*.fls | ||
*.out | ||
*.toc | ||
*.fmt | ||
*.fot | ||
*.cb | ||
*.cb2 | ||
*.nav | ||
*.pre | ||
*.snm | ||
*.vrb | ||
*.bak | ||
*.xls | ||
*.synctex.gz | ||
*.blg | ||
*.bbl | ||
.Rproj.user | ||
.Rhistory | ||
.RData | ||
inst/doc | ||
lingtypology.Rproj | ||
phonfieldwork.Rproj | ||
rsconnect |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,10 @@ | ||
Please, put the results of these functions below, if your issue is related to the technical bugs. Thank you! | ||
|
||
<details> <summary> info about OS and package versions </summary> | ||
``` | ||
sessionInfo()$R.version$platform | ||
sessionInfo()$R.version$version.string | ||
packageVersion("rmarkdown") | ||
packageVersion("phonfieldork") | ||
``` | ||
</details> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
191 changes: 191 additions & 0 deletions
191
...entations/2019.11.15_HSE_School_of_Linguistics/2019.11.15_HSE_SL_about_phonfieldworks.tex
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,191 @@ | ||
\documentclass[13pt, t]{beamer} | ||
% Presento style file | ||
\usepackage{config/presento} | ||
|
||
% custom command and packages | ||
\input{config/custom-command} | ||
|
||
\usepackage{color, colortbl} | ||
\setlength{\columnseprule}{0.4pt} | ||
|
||
\title{\Large \hspace{-0.5cm} Comparing phonological systems and \xcancel{syllable structure}\\ of Botlikh and Zilo Andi: a data-driven analysis} | ||
\author[shortname]{George Moroz} | ||
\institute[shortinst]{Linguistic Convergence Laboratory, NRU HSE, Moscow, Russia} | ||
\date{\begin{center} {\large 24 October 2019} \bigskip \\ {``Caucasian Languages: Typology and Diachrony'' in honor of M. E. Alekseev, \\ Institute of Linguistics RAS, Moscow}\\ \vfill Presentation is available here: {\large \href{https://tinyurl.com/y5xoks3l}{https://tinyurl.com/y5xoks3l} \hfill \includegraphics[height = 2.5cm]{images/01_qrcode}} \end{center}} | ||
|
||
\begin{document} | ||
|
||
\begin{frame}[plain] | ||
\maketitle | ||
\end{frame} | ||
|
||
\begin{frame}[c]{Phonological description: data-driven analysis} | ||
\begin{tabular}{ll|l} | ||
& \multicolumn{1}{c|}{\textbf{Traditional analysis}} & \multicolumn{1}{c}{\textbf{Data-driven analysis}} \\ \hline \pause | ||
1. & Done by trained linguist & Evaluated by trained linguist \\ \hline | ||
2. & Can be done from scratch & \begin{tabular}[c]{@{}l@{}} Previous description needed\\ (or at least prior expectations)\end{tabular} \\ \hline | ||
3. & Doesn't care about amount of data & Care more about amount of data\\ \hline | ||
4. & Less reproducible & More reproducible \\ \hline | ||
5. & Can not be automated & Can be automated | ||
\end{tabular} | ||
\vfill \pause | ||
Data-driven approach to phonological description and syllable structure analysis: | ||
\begin{itemize} | ||
\item was proposed in \citep{moroz2018} | ||
\item was applied to syllable structure in \citep{moroz2019} to Adyghe data | ||
\item was applied to syllable structure in \citep{romanova2019} to Russian and Macedonian data | ||
\end{itemize} | ||
\vfill \pause | ||
I will present an application of this method to Botlikh and Zilo Andi data | ||
\end{frame} | ||
|
||
\framepic{images/04_map}{\vspace{0.5cm} Andi and Botlikh villages, created with lingtypology package \citep{moroz2017}} | ||
|
||
\begin{frame} | ||
\begin{multicols}{2} | ||
\begin{itemize} | ||
\item Botlikh < Andic group < EC | ||
\item Unwritten (can be written with extended Cyrillic script for Avar) | ||
\item \textasciitilde{}5,000--8,000 speakers | ||
\item Mostly spoken in 3 villages in northwestern Daghestan (Russian Federation): Botlikh, Miarso, Ashino, (Ankho); minor dialectal differences | ||
\item One full reference grammar in Georgian \citep{gudava1962} | ||
\item Two dictionaries: \\ | ||
\citep{saidovaabusov2012}, \citep{alekseev2019} | ||
\end{itemize} | ||
\vfill | ||
\columnbreak | ||
\begin{itemize} | ||
\item Andi < Andic group < EC | ||
\item Unwritten (can be written with extended Cyrillic script for Avar) | ||
\item \textasciitilde{}16,500 speakers | ||
\item About 14 villages; There are two main dialect groups: Lower Andi (Muni, Kvankhidatli) and Upper Andi (the rest) | ||
\item Several reference grammars \citep{suleymanov57} (Rikvani), \citep{salimov10} (Gagatli), \citep{tsertsvadze65} (Andi) | ||
\item No dictionary except\\ \citep{kibrik1988} | ||
\end{itemize} | ||
\end{multicols} | ||
\end{frame} | ||
|
||
\begin{frame}{Comparing two Botlikh dictionaries} | ||
|
||
\begin{block}{\citep{saidovaabusov2012}} | ||
\begin{itemize} | ||
\item Compiled in the 2000s by a native speaker (M. G. Abusov) and an experienced linguist (P. A. Saidova) | ||
\item Mostly Botlikh with some notes on Miarso | ||
\end{itemize} | ||
\end{block} | ||
\pause | ||
\begin{block}{\citep{alekseev2019}} | ||
\begin{itemize} | ||
\item Compiled in the 1960s / 1970s by a native speaker / philologist (X. G. Azaev) and later (in the 2000s) systematized by an experienced linguist (M. E. Alekseev) | ||
\item Subsequently edited by T. A. Maisak and scheduled for posthumous publication later this year | ||
\item Botlikh only | ||
\end{itemize} | ||
\end{block} | ||
\end{frame} | ||
|
||
\begin{frame}{Comparing two Botlikh dictionaries} | ||
\begin{block}{Summary:} | ||
\begin{itemize} | ||
\item Dictionaries were compiled \textbf{independently} of each other | ||
\item with no metadata on the speakers consulted | ||
\item data collection was separated with several decades break | ||
\end{itemize} | ||
\end{block} | ||
\includegraphics[width=\linewidth]{images/05_dicts} | ||
\end{frame} | ||
|
||
\framepic{images/05_dicts}{\begin{itemize} | ||
\item Automatically merge two .doc file into one unified .xls file, ... \pause | ||
\item Manually check for similarities (S. Verhees, C. Naccarato and me) | ||
\end{itemize} | ||
} | ||
|
||
\begin{frame}{Comparing two Botlikh dictionaries} | ||
\includegraphics[width=0.94\linewidth]{images/03_venn} | ||
\end{frame} | ||
|
||
\begin{frame}{Comparing two Botlikh dictionaries} | ||
\begin{itemize} | ||
\item If we remove the stress sign, there are only 2495 lexemes which look phonetically the same, and 395 are different (14\%) | ||
\item If we don't remove the stress sign, there are 2027 lexemes which look phonetically the same, and 863 are different (30\%) | ||
\item[\color{colorblue} $\Rightarrow$] 16\% of lexemes have different stress pattern?\pause\ Including cases where stress is present in one dictionary and absent in the other. \pause | ||
\item What causes the difference between dictionaries? | ||
\begin{itemize} | ||
\item Stress pattern differences in 317 lexemes (about 11\%) | ||
\item Multiple cases where there is a small difference that could be explained either as a typo or in terms phonological variation \\ | ||
\hspace{-5em}\textit{čuhí}~`to~run’~\citep{alekseev2019} vs. \textit{čũhí} \citep{saidovaabusov2012}, \\ | ||
\hspace{-5em}\textit{kusu} `cherry plum’ \citep{alekseev2019} vs. \textit{kusːu} \citep{saidovaabusov2012} | ||
\item Multiple cases where Russian borrowings were adopted differently \\ | ||
\hspace{-5em}\textit{awtobus} `bus’ \citep{alekseev2019} vs. \textit{abtabus} \citep{saidovaabusov2012}, \\ | ||
\hspace{-5em}\textit{biton} `milk can’ \citep{alekseev2019} vs. \textit{bitun}~\citep{saidovaabusov2012}, \\ | ||
\hspace{-5em}\textit{apteka} `pharmacy’ \citep{alekseev2019} vs. \textit{abteka}~\citep{saidovaabusov2012} | ||
\item Morphological preferences \\ | ||
\hspace{-5em}\textit{dinija=w} `pious \citep{alekseev2019} vs. \textit{dinija=b}~\citep{saidovaabusov2012} | ||
\end{itemize} | ||
\end{itemize} | ||
\end{frame} | ||
|
||
\begin{frame}{Comparing two Botlikh dictionaries} | ||
About 25 cases:\\ | ||
|
||
\begin{tabular}{lll} | ||
\citep{alekseev2019} &\citep{saidovaabusov2012} & \\ | ||
\textit{ãha\textbf{\underline{j}}r} & \textit{ãhar} & 'message’ \\ | ||
\textit{beʒa\textbf{\underline{j}}r} & \textit{beʒir} & 'roasting’ \\ | ||
\textit{mik'ku\textbf{\underline{j}}r} & \textit{mik'ːur} & 'swallowing’ \\ | ||
\textit{reqχu\textbf{\underline{j}}r} & \textit{reqχʷir} & 'fight’ \\ | ||
\textit{reʃku\textbf{\underline{j}}r} & \textit{reʃkur} & 'overnight stay’ \\ | ||
\textit{rikʷa\textbf{\underline{j}}r} & \textit{rikʷar} & 'lighting’ \\ \hline | ||
\textit{χwardar} & \textit{χwardir} & 'digging' \\ | ||
\textit{miʔar} & \textit{miʔar} & 'nose'\\ | ||
\dots & \dots & \dots \\ | ||
& & \\ | ||
About 6 cases: & & \\ | ||
\textit{ʃːalaj} & \textit{ʃːallaj} & 'silt' \\ | ||
\textit{inuʕala} & \textit{inuʕalla} & 'everywhere' \\ | ||
\textit{ʕila} & \textit{ʕilla} & 'reason' \\ | ||
\dots & \dots & \dots \\ | ||
\end{tabular} | ||
\end{frame} | ||
|
||
\framepic{images/06_botlikh_dicts_without_stress}{} | ||
|
||
\framepic{images/07_botlikh_dicts_with_stress}{} | ||
|
||
\begin{frame}{Zilo Andi data} | ||
Dictionary data for Zilo were collected during fieldtrips to Zilo in 2016--2019 with N. Rochant, S. Verhees, A. Martynova and A. Zakirova who contributed to the same FieldWorks project. | ||
\begin{itemize} | ||
\item Contain morphological affixes | ||
\item Doesn't contain additional affixes in a lemma form | ||
\item Contain different stems of the same lexeme (e. g. \textsc{sg.abs, sg.obl, pl.abs, pl.obl, pst, npst}) | ||
\item No information about stress | ||
\end{itemize} | ||
\end{frame} | ||
|
||
\framepic{images/08_zilo_sa}{} | ||
|
||
\framepic{images/09_zilo_aa}{} | ||
|
||
\begin{frame}{Discussion:} | ||
\begin{itemize} | ||
\item Botlikh dictionaries were specially selected for shared meaning, the same procedure for the Andic dictionary was not done \pause | ||
\item Botlikh dictionaries contain a lot of borrowings, this is not true for the Andic dictionary \pause | ||
\item Lemmata are not the same as wordforms, so the model should be checked with the wordform material \pause | ||
\item Lemmata can contain some affix that will shift all frequencies (e.~g.~\textsc{inf}, \textsc{pl} or \textsc{=cl}) for some types of phonological units \pause | ||
\item It would be nice to compare the obtained models with the models built on corpora data \pause, when/if it will be available \pause | ||
\item Model computed using row frequencies should be extended with Markov Chains and vector models (like in a Distributional semantics)\pause: only in this way it will be possible to compare \textbf{paradigmatic} and \textbf{syntagmatic} relations within the phonological systems and across languages | ||
\end{itemize} | ||
\end{frame} | ||
|
||
\framecard[colorblue]{{\color{colorwhite} \Large Send me a letter!\\ | ||
agricolamzgmail.com\\ | ||
\vfill Presentation is available here: \\tinyurl.com/y3wtkcbq\\ | ||
\vfill \includegraphics[height = 4cm]{images/02_qrcode}}} | ||
|
||
\begin{frame}{References} | ||
\footnotesize | ||
\bibliographystyle{config/chicago} | ||
\bibliography{bibliography} | ||
\end{frame} | ||
|
||
\end{document} |
80 changes: 80 additions & 0 deletions
80
presentations/2019.11.15_HSE_School_of_Linguistics/bibliography.bib
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
@book{gudava1962, | ||
title={Botlixuri ena [{T}he {B}otlikh language]}, | ||
author={Gudava, Togo, E.}, | ||
year={1962}, | ||
publisher={Tbilisi: Mecniereba} | ||
} | ||
|
||
@Manual{moroz2017, | ||
title = {lingtypology: easy mapping for Linguistic Typology}, | ||
author = {George Moroz}, | ||
year = {2017}, | ||
url = {https://CRAN.R-project.org/package=lingtypology}, | ||
} | ||
|
||
@unpublished{romanova2019, | ||
title={Automatic {S}yllable {S}tructure {E}xtracting {F}rom {D}ictionaries: {S}lavic {D}ata}, | ||
author={Romanova, K., I.}, | ||
year={2019}, | ||
note = {Term paper} | ||
} | ||
|
||
@unpublished{moroz2018, | ||
title={lingphonology: automatic phonological description}, | ||
author={Moroz, G., A.}, | ||
year={2018}, | ||
note = {R package draft} | ||
} | ||
|
||
@article{moroz2019, | ||
title={Slogovaya struktura adygeyskogo yazika: ot dannyx k obosheniyam [{A}dyghe syllable structure: {F}rom empirical data to generalizations]}, | ||
author={Moroz, G., A.}, | ||
year={2019}, | ||
journal={Voprosy Jazykoznanija}, | ||
volume ={2}, | ||
pages={82--95} | ||
} | ||
@phdthesis{suleymanov57, | ||
title={Grammatičeskij očerk andijskogo jazyka (po dannim govora s. Rikvani) [Grammar sketch of the Andi language (based on material from the dialect of the village Rikvani)]}, | ||
author={Suleymanov, J. G.}, | ||
year={1957}, | ||
school={Institut Jazykoznania AN SSSR} | ||
} | ||
|
||
|
||
@book{salimov10, | ||
title={Gagatlinskij govor andijskogo jazyka [The Gagatli dialect of the Andi language]}, | ||
author={Salimov, X. S.}, | ||
publisher={Makhachkala}, | ||
year={2010 (1968)} | ||
} | ||
|
||
@book{kibrik1988, | ||
title={Sopostavitelnoye izucheniye dagestanskikh yazykov [Comparative study of Daghestanian languages]}, | ||
author={Kibrik, A. E. and Kodzasov, S. V}, | ||
year={1988}, | ||
address = {Moscow}, | ||
publisher={Moscow State University} | ||
} | ||
|
||
|
||
@book{tsertsvadze65, | ||
title={Andiuri Ena}, | ||
author={Tsertsvadze, I. I.}, | ||
publisher={Tbilisi: Metsniereba}, | ||
year={1965} | ||
} | ||
|
||
@book{saidovaabusov2012, | ||
title={Botlixsko-russkij slovar' [{B}otlikh-{R}ussian dictionary]}, | ||
author={Saidova, Patimat A. and Abusov, Magomed G.}, | ||
year={2012}, | ||
publisher={Makhachkala: IJaLI}} | ||
@book{alekseev2019, | ||
title={Botlixsko-russkij slovar' [{B}otlikh-{R}ussian dictionary]}, | ||
author={Alekseev, M.E. and Azaev, X.G.}, | ||
year={2019}, | ||
publisher={Moscow: Academia}} |
Oops, something went wrong.